Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -10775,6 +10775,35 @@ This function returns the same values as the libm ``round`` functions would, and handles error conditions in the same way. +'``llvm.abs.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic function. You can use abs on any +integer type. + +:: + + declare i16 @llvm.abs.i16(i16 ) + declare i32 @llvm.abs.i32(i32 ) + declare i64 @llvm.abs.i64(i64 ) + +Overview: +""""""""" + +The '``llvm.abs``' family of intrinsics is used to determine the unsigned +absolute value of a signed integer value. + +Semantics: +"""""""""" + +The ``llvm.abs.iN`` intrinsic returns an unsigned iN integer value +representing the absolute value of a signed iN integer value. +The integer absolute result of the minimum signed value is expected to be +the same value (e.g. -32768 (0x8000) -> +32768 (0x8000)). + Bit Manipulation Intrinsics --------------------------- Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -332,6 +332,9 @@ /// Bitwise operators - logical and, logical or, logical xor. AND, OR, XOR, + /// Signed integer absolute. + ABS, + /// Shift and rotation operations. After legalization, the type of the /// shift amount is known to be TLI.getShiftAmountTy(). Before legalization /// the shift amount can be any type, but care must be taken to ensure it is Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -443,6 +443,7 @@ // None of these intrinsics accesses memory at all. let IntrProperties = [IntrNoMem] in { + def int_abs: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -406,6 +406,7 @@ [SDNPCommutative, SDNPAssociative]>; def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>; +def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>; def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>; def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>; Index: lib/Analysis/ConstantFolding.cpp =================================================================== --- lib/Analysis/ConstantFolding.cpp +++ lib/Analysis/ConstantFolding.cpp @@ -1331,6 +1331,7 @@ case Intrinsic::nearbyint: case Intrinsic::pow: case Intrinsic::powi: + case Intrinsic::abs: case Intrinsic::bswap: case Intrinsic::ctpop: case Intrinsic::ctlz: @@ -1681,6 +1682,8 @@ if (auto *Op = dyn_cast(Operands[0])) { switch (IntrinsicID) { + case Intrinsic::abs: + return ConstantInt::get(Ty->getContext(), Op->getValue().abs()); case Intrinsic::bswap: return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap()); case Intrinsic::ctpop: Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -258,6 +258,7 @@ SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitRotate(SDNode *N); + SDValue visitABS(SDNode *N); SDValue visitBSWAP(SDNode *N); SDValue visitBITREVERSE(SDNode *N); SDValue visitCTLZ(SDNode *N); @@ -1411,6 +1412,7 @@ case ISD::SRL: return visitSRL(N); case ISD::ROTR: case ISD::ROTL: return visitRotate(N); + case ISD::ABS: return visitABS(N); case ISD::BSWAP: return visitBSWAP(N); case ISD::BITREVERSE: return visitBITREVERSE(N); case ISD::CTLZ: return visitCTLZ(N); @@ -4355,6 +4357,17 @@ N01C->getAPIntValue(), DL, VT)); } } + + // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && + N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0)); + } + // fold (xor x, x) -> 0 if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); @@ -5084,6 +5097,22 @@ return SDValue(); } +SDValue DAGCombiner::visitABS(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (abs c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0); + // fold (abs (abs x)) -> (abs x) + if (N0.getOpcode() == ISD::ABS) + return N0; + // fold (abs x) -> x iff not-negative + if (DAG.SignBitIsZero(N0)) + return N0; + return SDValue(); +} + SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -147,6 +147,7 @@ SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl); + SDValue ExpandABS(SDValue Op, const SDLoc &dl); SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl); @@ -2533,6 +2534,21 @@ return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } +/// Legalize an integer absolute operation. +SDValue SelectionDAGLegalize::ExpandABS(SDValue Op, const SDLoc &dl) { + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + + // Branchless integer absolute. + // sign = x sra>> (scalarsizeinbits - 1) + // abs(x) = (x + sign) ^ sign. + SDLoc DL(Op); + SDValue Shift = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, SHVT); + SDValue Mask = DAG.getNode(ISD::SRA, DL, VT, Op, Shift); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, Op, Mask); + return DAG.getNode(ISD::XOR, DL, VT, Add, Mask); +} + /// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts. SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); @@ -2777,6 +2793,9 @@ Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); Results.push_back(Tmp1); break; + case ISD::ABS: + Results.push_back(ExpandABS(Node->getOperand(0), dl)); + break; case ISD::BITREVERSE: Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl)); break; Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -123,6 +123,8 @@ case ISD::SUB: case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::ABS: Res = PromoteIntRes_SExtIntUnaryOp(N); break; + case ISD::SDIV: case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; @@ -647,6 +649,12 @@ LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntUnaryOp(SDNode *N) { + // Sign extend the input. + SDValue Src = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), Src.getValueType(), Src); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { // Sign extend the input. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); @@ -1313,6 +1321,7 @@ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break; case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; @@ -1866,6 +1875,26 @@ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); } +void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + + // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo) + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, + DAG.getConstant(0, dl, VT), N0); + SDValue NegLo, NegHi; + SplitInteger(Neg, NegLo, NegHi); + + GetExpandedInteger(N0, Lo, Hi); + EVT NVT = Lo.getValueType(); + SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), + DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT); + Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo); + Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -271,6 +271,7 @@ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_SExtIntUnaryOp(SDNode *N); SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); SDValue PromoteIntRes_SRA(SDNode *N); SDValue PromoteIntRes_SRL(SDNode *N); @@ -345,6 +346,7 @@ void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ABS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -105,6 +105,7 @@ SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); + SDValue ExpandABS(SDValue Op); SDValue ExpandBITREVERSE(SDValue Op); SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op); @@ -276,6 +277,7 @@ case ISD::AND: case ISD::OR: case ISD::XOR: + case ISD::ABS: case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -691,6 +693,8 @@ return ExpandFNEG(Op); case ISD::SETCC: return UnrollVSETCC(Op); + case ISD::ABS: + return ExpandABS(Op); case ISD::BITREVERSE: return ExpandBITREVERSE(Op); case ISD::CTLZ_ZERO_UNDEF: @@ -880,6 +884,20 @@ return DAG.getNode(ISD::BITCAST, DL, VT, Op); } +SDValue VectorLegalizer::ExpandABS(SDValue Op) { + EVT VT = Op.getValueType(); + + // If we have the appropriate vector bit operations, it is better to use them + // than unrolling and expanding each component. + if (!TLI.isOperationLegalOrCustom(ISD::ADD, VT) || + !TLI.isOperationLegalOrCustom(ISD::SRA, VT) || + !TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) + return DAG.UnrollVectorOp(Op.getNode()); + + // Let LegalizeDAG handle this later. + return Op; +} + SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { EVT VT = Op.getValueType(); Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -66,6 +66,7 @@ case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::ABS: case ISD::ANY_EXTEND: case ISD::BITREVERSE: case ISD::BSWAP: @@ -619,6 +620,7 @@ SplitVecRes_ExtVecInRegOp(N, Lo, Hi); break; + case ISD::ABS: case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CONVERT_RNDSAT: @@ -2134,6 +2136,7 @@ Res = WidenVecRes_Convert(N); break; + case ISD::ABS: case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3052,6 +3052,9 @@ if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT); break; + case ISD::ABS: + return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::BSWAP: return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), C->isOpaque()); @@ -3149,6 +3152,7 @@ case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: + case ISD::ABS: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: @@ -3266,6 +3270,14 @@ if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); break; + case ISD::ABS: + assert(VT.isInteger() && VT == Operand.getValueType() && + "Invalid ABS!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + if (VT.getScalarType() == MVT::i1) + return Operand; + break; case ISD::BSWAP: assert(VT.isInteger() && VT == Operand.getValueType() && "Invalid BSWAP!"); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5335,6 +5335,11 @@ DAG.setRoot(Res.getValue(1)); return nullptr; } + case Intrinsic::abs: + setValue(&I, DAG.getNode(ISD::ABS, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return nullptr; case Intrinsic::bitreverse: setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, getValue(I.getArgOperand(0)).getValueType(), Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -315,6 +315,7 @@ case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; // Bit manipulation + case ISD::ABS: return "abs"; case ISD::BITREVERSE: return "bitreverse"; case ISD::BSWAP: return "bswap"; case ISD::CTPOP: return "ctpop"; Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -878,6 +878,7 @@ setOperationAction(ISD::SMAX, VT, Expand); setOperationAction(ISD::UMIN, VT, Expand); setOperationAction(ISD::UMAX, VT, Expand); + setOperationAction(ISD::ABS, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -238,9 +238,6 @@ FHADD, FHSUB, - // Integer absolute value - ABS, - // Detect Conflicts Within a Vector CONFLICT, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -886,6 +886,9 @@ } if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { + setOperationAction(ISD::ABS, MVT::v16i8, Legal); + setOperationAction(ISD::ABS, MVT::v8i16, Legal); + setOperationAction(ISD::ABS, MVT::v4i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom); setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); @@ -1063,6 +1066,7 @@ setOperationAction(ISD::MULHS, MVT::v32i8, Custom); for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { + setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom); @@ -1262,6 +1266,8 @@ } } if (Subtarget.hasVLX()) { + setOperationAction(ISD::ABS, MVT::v4i64, Legal); + setOperationAction(ISD::ABS, MVT::v2i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); @@ -1360,6 +1366,7 @@ setOperationAction(ISD::MUL, MVT::v16i32, Legal); for (auto VT : { MVT::v16i32, MVT::v8i64 }) { + setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); @@ -1539,6 +1546,7 @@ for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); @@ -19748,6 +19756,25 @@ return Lower256IntArith(Op, DAG); } +static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) { + assert(Op.getSimpleValueType().is256BitVector() && + Op.getSimpleValueType().isInteger() && + "Only handle AVX 256-bit vector integer operation"); + MVT VT = Op.getSimpleValueType(); + unsigned NumElems = VT.getVectorNumElements(); + + SDLoc dl(Op); + SDValue Src = Op.getOperand(0); + SDValue Lo = extract128BitVector(Src, 0, DAG, dl); + SDValue Hi = extract128BitVector(Src, NumElems / 2, DAG, dl); + + MVT EltVT = VT.getVectorElementType(); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + DAG.getNode(Op.getOpcode(), dl, NewVT, Lo), + DAG.getNode(Op.getOpcode(), dl, NewVT, Hi)); +} + static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && @@ -22251,6 +22278,7 @@ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); + case ISD::ABS: return LowerABS(Op, DAG); case ISD::SMAX: case ISD::SMIN: case ISD::UMAX: @@ -22648,7 +22676,6 @@ case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; - case X86ISD::ABS: return "X86ISD::ABS"; case X86ISD::CONFLICT: return "X86ISD::CONFLICT"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -8310,57 +8310,7 @@ HasBWI>; } -defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>; - -let Predicates = [HasBWI, HasVLX] in { - def : Pat<(xor - (bc_v2i64 (v16i1sextv16i8)), - (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), - (VPABSBZ128rr VR128:$src)>; - def : Pat<(xor - (bc_v2i64 (v8i1sextv8i16)), - (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), - (VPABSWZ128rr VR128:$src)>; - def : Pat<(xor - (bc_v4i64 (v32i1sextv32i8)), - (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), - (VPABSBZ256rr VR256:$src)>; - def : Pat<(xor - (bc_v4i64 (v16i1sextv16i16)), - (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), - (VPABSWZ256rr VR256:$src)>; -} -let Predicates = [HasAVX512, HasVLX] in { - def : Pat<(xor - (bc_v2i64 (v4i1sextv4i32)), - (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), - (VPABSDZ128rr VR128:$src)>; - def : Pat<(xor - (bc_v4i64 (v8i1sextv8i32)), - (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), - (VPABSDZ256rr VR256:$src)>; -} - -let Predicates = [HasAVX512] in { -def : Pat<(xor - (bc_v8i64 (v16i1sextv16i32)), - (bc_v8i64 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), - (VPABSDZrr VR512:$src)>; -def : Pat<(xor - (bc_v8i64 (v8i1sextv8i64)), - (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), - (VPABSQZrr VR512:$src)>; -} -let Predicates = [HasBWI] in { -def : Pat<(xor - (bc_v8i64 (v64i1sextv64i8)), - (bc_v8i64 (add (v64i8 VR512:$src), (v64i1sextv64i8)))), - (VPABSBZrr VR512:$src)>; -def : Pat<(xor - (bc_v8i64 (v32i1sextv32i16)), - (bc_v8i64 (add (v32i16 VR512:$src), (v32i1sextv32i16)))), - (VPABSWZrr VR512:$src)>; -} +defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>; multiclass avx512_ctlz opc, string OpcodeStr, Predicate prd>{ Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -352,7 +352,6 @@ def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; -def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>; def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -5330,7 +5330,6 @@ // SSSE3 - Packed Absolute Instructions //===---------------------------------------------------------------------===// - /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm opc, string OpcodeStr, ValueType vt, SDNode OpNode, PatFrag ld_frag> { @@ -5365,84 +5364,25 @@ Sched<[WriteVecALULd]>; } -// Helper fragments to match sext vXi1 to vXiY. -def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), - VR128:$src))>; -def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>; -def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>; -def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), - VR256:$src))>; -def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>; -def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>; - -let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, X86Abs, loadv2i64>, VEX; - defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, X86Abs, loadv2i64>, VEX; -} -let Predicates = [HasAVX, NoVLX] in { - defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, X86Abs, loadv2i64>, VEX; -} - let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - def : Pat<(xor - (bc_v2i64 (v16i1sextv16i8)), - (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), - (VPABSBrr VR128:$src)>; - def : Pat<(xor - (bc_v2i64 (v8i1sextv8i16)), - (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), - (VPABSWrr VR128:$src)>; + defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX; + defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX; } let Predicates = [HasAVX, NoVLX] in { - def : Pat<(xor - (bc_v2i64 (v4i1sextv4i32)), - (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), - (VPABSDrr VR128:$src)>; + defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, X86Abs>, VEX, VEX_L; - defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, X86Abs>, VEX, VEX_L; + defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L; + defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L; } let Predicates = [HasAVX2, NoVLX] in { - defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, X86Abs>, VEX, VEX_L; + defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L; } -let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - def : Pat<(xor - (bc_v4i64 (v32i1sextv32i8)), - (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), - (VPABSBYrr VR256:$src)>; - def : Pat<(xor - (bc_v4i64 (v16i1sextv16i16)), - (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), - (VPABSWYrr VR256:$src)>; -} -let Predicates = [HasAVX2, NoVLX] in { - def : Pat<(xor - (bc_v4i64 (v8i1sextv8i32)), - (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), - (VPABSDYrr VR256:$src)>; -} - -defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, X86Abs, memopv2i64>; -defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, X86Abs, memopv2i64>; -defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, X86Abs, memopv2i64>; - -let Predicates = [UseSSSE3] in { - def : Pat<(xor - (bc_v2i64 (v16i1sextv16i8)), - (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), - (PABSBrr VR128:$src)>; - def : Pat<(xor - (bc_v2i64 (v8i1sextv8i16)), - (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), - (PABSWrr VR128:$src)>; - def : Pat<(xor - (bc_v2i64 (v4i1sextv4i32)), - (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), - (PABSDrr VR128:$src)>; -} +defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>; +defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>; +defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>; //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -275,9 +275,9 @@ X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), - X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, ISD::ABS, 0), X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), @@ -803,18 +803,18 @@ X86ISD::FMUL_RND, 0), X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMUL_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), @@ -1690,9 +1690,9 @@ X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0), X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0), X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0), - X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, ISD::ABS, 0), + X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, ISD::ABS, 0), + X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, ISD::ABS, 0), X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0), Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1441,6 +1441,22 @@ break; } + case Intrinsic::abs: { + Type *Ty = II->getType(); + Value *IIOperand = II->getArgOperand(0); + + // abs(i1 x) -> x + if (Ty->getScalarType()->isIntegerTy(1)) + return replaceInstUsesWith(CI, IIOperand); + // abs(abs(x)) -> abs(x) + if (match(IIOperand, m_Intrinsic())) + return replaceInstUsesWith(CI, IIOperand); + // fold (abs x) -> x iff not-negative + if (isKnownNonNegative(IIOperand, getDataLayout())) + return replaceInstUsesWith(CI, IIOperand); + break; + } + case Intrinsic::bitreverse: { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; Index: test/CodeGen/X86/combine-abs.ll =================================================================== --- test/CodeGen/X86/combine-abs.ll +++ test/CodeGen/X86/combine-abs.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX + +; fold (abs c1) -> c +define <4 x i32> @combine_fold_abs_v4i32() { +; SSE-LABEL: combine_fold_abs_v4i32: +; SSE: # BB#0: +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,3,0,2147483648] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_fold_abs_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,3,0,2147483648] +; AVX-NEXT: retq + %1 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> ) + ret <4 x i32> %1 +} + +; fold (abs (abs x)) -> (abs x) +define <4 x i32> @combine_abs_abs_v4i32(<4 x i32> %x) { +; SSE2-LABEL: combine_abs_abs_v4i32: +; SSE2: # BB#0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: paddd %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: combine_abs_abs_v4i32: +; SSE41: # BB#0: +; SSE41-NEXT: pabsd %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: combine_abs_abs_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpabsd %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x) + %2 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %1) + ret <4 x i32> %2 +} + +; fold (abs x) -> x iff not-negative +define <4 x i32> @combine_abs_and_v4i32(<4 x i32> %x) { +; SSE-LABEL: combine_abs_and_v4i32: +; SSE: # BB#0: +; SSE-NEXT: andps {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_abs_and_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = and <4 x i32> %x, + %2 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %1) + ret <4 x i32> %2 +} + +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>) readnone Index: test/CodeGen/X86/legalize-abs.ll =================================================================== --- test/CodeGen/X86/legalize-abs.ll +++ test/CodeGen/X86/legalize-abs.ll @@ -0,0 +1,279 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32 --check-prefix=X32-SSE --check-prefix=X32-SSE2 +; RUN: llc < %s -mtriple=i686-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=X32 --check-prefix=X32-SSE --check-prefix=X32-SSSE3 +; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX --check-prefix=X32-AVX1 +; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX --check-prefix=X32-AVX2 +; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX --check-prefix=X32-AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSSE3 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX512 + +declare i1 @llvm.abs.i1(i1) readnone +declare i27 @llvm.abs.i27(i27) readnone +declare i64 @llvm.abs.i64(i64) readnone +declare i128 @llvm.abs.i128(i128) readnone +declare <4 x i31> @llvm.abs.v4i31(<4 x i31>) readnone +declare <2 x i33> @llvm.abs.v2i33(<2 x i33>) readnone + +define i1 @test_abs_i1(i1 %a) nounwind { +; X32-LABEL: test_abs_i1: +; X32: # BB#0: +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: retl +; +; X64-LABEL: test_abs_i1: +; X64: # BB#0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: retq + %b = call i1 @llvm.abs.i1(i1 %a) + ret i1 %b +} + +define i27 @test_abs_i27(i27 %a) nounwind { +; X32-LABEL: test_abs_i27: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: shll $5, %ecx +; X32-NEXT: sarl $5, %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: negl %eax +; X32-NEXT: cmovll %ecx, %eax +; X32-NEXT: retl +; +; X64-LABEL: test_abs_i27: +; X64: # BB#0: +; X64-NEXT: shll $5, %edi +; X64-NEXT: sarl $5, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmovll %edi, %eax +; X64-NEXT: retq + %b = call i27 @llvm.abs.i27(i27 %a) + ret i27 %b +} + +define i64 @test_abs_i64(i64 %a) nounwind { +; X32-LABEL: test_abs_i64: +; X32: # BB#0: +; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: negl %eax +; X32-NEXT: sbbl %esi, %edx +; X32-NEXT: testl %esi, %esi +; X32-NEXT: cmovnsl %ecx, %eax +; X32-NEXT: cmovnsl %esi, %edx +; X32-NEXT: popl %esi +; X32-NEXT: retl +; +; X64-LABEL: test_abs_i64: +; X64: # BB#0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: negq %rax +; X64-NEXT: cmovlq %rdi, %rax +; X64-NEXT: retq + %b = call i64 @llvm.abs.i64(i64 %a) + ret i64 %b +} + +define i128 @test_abs_i128(i128 %a) nounwind { +; X32-LABEL: test_abs_i128: +; X32: # BB#0: +; X32-NEXT: pushl %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: xorl %esi, %esi +; X32-NEXT: negl %edi +; X32-NEXT: movl $0, %ebx +; X32-NEXT: sbbl %edx, %ebx +; X32-NEXT: movl $0, %ebp +; X32-NEXT: sbbl %ecx, %ebp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: sbbl %eax, %esi +; X32-NEXT: testl %eax, %eax +; X32-NEXT: cmovnsl %eax, %esi +; X32-NEXT: cmovnsl %ecx, %ebp +; X32-NEXT: cmovnsl %edx, %ebx +; X32-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %edi, (%eax) +; X32-NEXT: movl %ebx, 4(%eax) +; X32-NEXT: movl %ebp, 8(%eax) +; X32-NEXT: movl %esi, 12(%eax) +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl $4 +; +; X64-LABEL: test_abs_i128: +; X64: # BB#0: +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: negq %rax +; X64-NEXT: sbbq %rsi, %rdx +; X64-NEXT: testq %rsi, %rsi +; X64-NEXT: cmovnsq %rdi, %rax +; X64-NEXT: cmovnsq %rsi, %rdx +; X64-NEXT: retq + %b = call i128 @llvm.abs.i128(i128 %a) + ret i128 %b +} + +define <4 x i31> @test_abs_v4i31(<4 x i31> %a) nounwind { +; X32-SSE2-LABEL: test_abs_v4i31: +; X32-SSE2: # BB#0: +; X32-SSE2-NEXT: pslld $1, %xmm0 +; X32-SSE2-NEXT: psrad $1, %xmm0 +; X32-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE2-NEXT: psrad $31, %xmm1 +; X32-SSE2-NEXT: paddd %xmm1, %xmm0 +; X32-SSE2-NEXT: pxor %xmm1, %xmm0 +; X32-SSE2-NEXT: retl +; +; X32-SSSE3-LABEL: test_abs_v4i31: +; X32-SSSE3: # BB#0: +; X32-SSSE3-NEXT: pslld $1, %xmm0 +; X32-SSSE3-NEXT: psrad $1, %xmm0 +; X32-SSSE3-NEXT: pabsd %xmm0, %xmm0 +; X32-SSSE3-NEXT: retl +; +; X32-AVX-LABEL: test_abs_v4i31: +; X32-AVX: # BB#0: +; X32-AVX-NEXT: vpslld $1, %xmm0, %xmm0 +; X32-AVX-NEXT: vpsrad $1, %xmm0, %xmm0 +; X32-AVX-NEXT: vpabsd %xmm0, %xmm0 +; X32-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_abs_v4i31: +; X64-SSE2: # BB#0: +; X64-SSE2-NEXT: pslld $1, %xmm0 +; X64-SSE2-NEXT: psrad $1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: psrad $31, %xmm1 +; X64-SSE2-NEXT: paddd %xmm1, %xmm0 +; X64-SSE2-NEXT: pxor %xmm1, %xmm0 +; X64-SSE2-NEXT: retq +; +; X64-SSSE3-LABEL: test_abs_v4i31: +; X64-SSSE3: # BB#0: +; X64-SSSE3-NEXT: pslld $1, %xmm0 +; X64-SSSE3-NEXT: psrad $1, %xmm0 +; X64-SSSE3-NEXT: pabsd %xmm0, %xmm0 +; X64-SSSE3-NEXT: retq +; +; X64-AVX-LABEL: test_abs_v4i31: +; X64-AVX: # BB#0: +; X64-AVX-NEXT: vpslld $1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrad $1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpabsd %xmm0, %xmm0 +; X64-AVX-NEXT: retq + %b = call <4 x i31> @llvm.abs.v4i31(<4 x i31> %a) + ret <4 x i31> %b +} + +define <2 x i33> @test_abs_v2i33(<2 x i33> %a) nounwind { +; X32-SSE-LABEL: test_abs_v2i33: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: psllq $31, %xmm0 +; X32-SSE-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE-NEXT: psrad $31, %xmm1 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; X32-SSE-NEXT: psrlq $31, %xmm0 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-SSE-NEXT: movdqa %xmm0, %xmm1 +; X32-SSE-NEXT: psrad $31, %xmm1 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X32-SSE-NEXT: paddq %xmm1, %xmm0 +; X32-SSE-NEXT: pxor %xmm1, %xmm0 +; X32-SSE-NEXT: retl +; +; X32-AVX1-LABEL: test_abs_v2i33: +; X32-AVX1: # BB#0: +; X32-AVX1-NEXT: vpsllq $31, %xmm0, %xmm0 +; X32-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 +; X32-AVX1-NEXT: vpsrlq $31, %xmm0, %xmm0 +; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X32-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 +; X32-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X32-AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; X32-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X32-AVX1-NEXT: retl +; +; X32-AVX2-LABEL: test_abs_v2i33: +; X32-AVX2: # BB#0: +; X32-AVX2-NEXT: vpsllq $31, %xmm0, %xmm0 +; X32-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1 +; X32-AVX2-NEXT: vpsrlq $31, %xmm0, %xmm0 +; X32-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; X32-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1 +; X32-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X32-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; X32-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X32-AVX2-NEXT: retl +; +; X32-AVX512-LABEL: test_abs_v2i33: +; X32-AVX512: # BB#0: +; X32-AVX512-NEXT: vpsllq $31, %xmm0, %xmm0 +; X32-AVX512-NEXT: vpsraq $31, %xmm0, %xmm0 +; X32-AVX512-NEXT: vpabsq %xmm0, %xmm0 +; X32-AVX512-NEXT: retl +; +; X64-SSE-LABEL: test_abs_v2i33: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: psllq $31, %xmm0 +; X64-SSE-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE-NEXT: psrad $31, %xmm1 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; X64-SSE-NEXT: psrlq $31, %xmm0 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-SSE-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE-NEXT: psrad $31, %xmm1 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X64-SSE-NEXT: paddq %xmm1, %xmm0 +; X64-SSE-NEXT: pxor %xmm1, %xmm0 +; X64-SSE-NEXT: retq +; +; X64-AVX1-LABEL: test_abs_v2i33: +; X64-AVX1: # BB#0: +; X64-AVX1-NEXT: vpsllq $31, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpsrlq $31, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X64-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X64-AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_abs_v2i33: +; X64-AVX2: # BB#0: +; X64-AVX2-NEXT: vpsllq $31, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpsrlq $31, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; X64-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X64-AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: test_abs_v2i33: +; X64-AVX512: # BB#0: +; X64-AVX512-NEXT: vpsllq $31, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpsraq $31, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpabsq %xmm0, %xmm0 +; X64-AVX512-NEXT: retq + %b = call <2 x i33> @llvm.abs.v2i33(<2 x i33> %a) + ret <2 x i33> %b +} Index: test/CodeGen/X86/viabs.ll =================================================================== --- test/CodeGen/X86/viabs.ll +++ test/CodeGen/X86/viabs.ll @@ -147,14 +147,10 @@ ; ; AVX1-LABEL: test_abs_gt_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 -; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3 -; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1 -; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_gt_v8i32: @@ -193,14 +189,10 @@ ; ; AVX1-LABEL: test_abs_ge_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 -; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3 -; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1 -; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_ge_v8i32: @@ -239,14 +231,10 @@ ; ; AVX1-LABEL: test_abs_gt_v16i16: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpsraw $15, %xmm1, %xmm2 -; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsraw $15, %xmm0, %xmm3 -; AVX1-NEXT: vpaddw %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1 -; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpabsw %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsw %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_gt_v16i16: @@ -285,15 +273,10 @@ ; ; AVX1-LABEL: test_abs_lt_v32i8: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm3 -; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm4 -; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vxorps %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vpabsb %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsb %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_lt_v32i8: @@ -332,14 +315,10 @@ ; ; AVX1-LABEL: test_abs_le_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 -; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3 -; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1 -; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_le_v8i32: @@ -388,22 +367,14 @@ ; ; AVX1-LABEL: test_abs_le_16i32: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3 -; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsrad $31, %xmm0, %xmm4 -; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2 -; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3 -; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm4 -; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2 -; AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vpabsd %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vpabsd %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpabsd %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_le_16i32: @@ -450,9 +421,7 @@ ; ; AVX512-LABEL: test_abs_ge_v2i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpsraq $63, %xmm0, %xmm1 -; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpxorq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpabsq %xmm0, %xmm0 ; AVX512-NEXT: retq %tmp1neg = sub <2 x i64> zeroinitializer, %a %b = icmp sge <2 x i64> %a, zeroinitializer @@ -499,9 +468,7 @@ ; ; AVX512-LABEL: test_abs_gt_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpsraq $63, %ymm0, %ymm1 -; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpxorq %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpabsq %ymm0, %ymm0 ; AVX512-NEXT: retq %tmp1neg = sub <4 x i64> zeroinitializer, %a %b = icmp sgt <4 x i64> %a, @@ -691,23 +658,14 @@ ; ; AVX1-LABEL: test_abs_lt_v64i8: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4 -; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm5 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm6 -; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vxorps %ymm6, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4 -; AVX1-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm5 -; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1 +; AVX1-NEXT: vpabsb %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsb %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vpabsb %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpabsb %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_lt_v64i8: @@ -763,22 +721,14 @@ ; ; AVX1-LABEL: test_abs_gt_v32i16: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3 -; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsraw $15, %xmm0, %xmm4 -; AVX1-NEXT: vpaddw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2 -; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3 -; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsraw $15, %xmm1, %xmm4 -; AVX1-NEXT: vpaddw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2 -; AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vpabsw %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsw %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vpabsw %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpabsw %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_gt_v32i16: @@ -802,3 +752,112 @@ %abs = select <32 x i1> %b, <32 x i16> %a, <32 x i16> %tmp1neg ret <32 x i16> %abs } + +; +; ISD::ABS Tests +; + +declare <2 x i64> @llvm.abs.v2i64(<2 x i64>) readnone +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>) readnone +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>) readnone +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>) readnone + +define <2 x i64> @test_abs_v2i64(<2 x i64> %a) nounwind { +; SSE-LABEL: test_abs_v2i64: +; SSE: # BB#0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE-NEXT: paddq %xmm1, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: test_abs_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_abs_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1 +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_abs_v2i64: +; AVX512: # BB#0: +; AVX512-NEXT: vpabsq %xmm0, %xmm0 +; AVX512-NEXT: retq + %b = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a) + ret <2 x i64> %b +} + +define <4 x i32> @test_abs_v4i32(<4 x i32> %a) nounwind { +; SSE2-LABEL: test_abs_v4i32: +; SSE2: # BB#0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: paddd %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: test_abs_v4i32: +; SSSE3: # BB#0: +; SSSE3-NEXT: pabsd %xmm0, %xmm0 +; SSSE3-NEXT: retq +; +; AVX-LABEL: test_abs_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpabsd %xmm0, %xmm0 +; AVX-NEXT: retq + %b = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a) + ret <4 x i32> %b +} + +define <8 x i16> @test_abs_v8i16(<8 x i16> %a) nounwind { +; SSE2-LABEL: test_abs_v8i16: +; SSE2: # BB#0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psraw $15, %xmm1 +; SSE2-NEXT: paddw %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: test_abs_v8i16: +; SSSE3: # BB#0: +; SSSE3-NEXT: pabsw %xmm0, %xmm0 +; SSSE3-NEXT: retq +; +; AVX-LABEL: test_abs_v8i16: +; AVX: # BB#0: +; AVX-NEXT: vpabsw %xmm0, %xmm0 +; AVX-NEXT: retq + %b = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a) + ret <8 x i16> %b +} + +define <16 x i8> @test_abs_v16i8(<16 x i8> %a) nounwind { +; SSE2-LABEL: test_abs_v16i8: +; SSE2: # BB#0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 +; SSE2-NEXT: paddb %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: test_abs_v16i8: +; SSSE3: # BB#0: +; SSSE3-NEXT: pabsb %xmm0, %xmm0 +; SSSE3-NEXT: retq +; +; AVX-LABEL: test_abs_v16i8: +; AVX: # BB#0: +; AVX-NEXT: vpabsb %xmm0, %xmm0 +; AVX-NEXT: retq + %b = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a) + ret <16 x i8> %b +} Index: test/Transforms/InstCombine/abs.ll =================================================================== --- test/Transforms/InstCombine/abs.ll +++ test/Transforms/InstCombine/abs.ll @@ -0,0 +1,160 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +declare i1 @llvm.abs.i1(i1) readnone +declare i8 @llvm.abs.i8(i8) readnone +declare i16 @llvm.abs.i16(i16) readnone +declare i32 @llvm.abs.i32(i32) readnone +declare i64 @llvm.abs.i64(i64) readnone +declare i128 @llvm.abs.i128(i128) readnone + +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>) readnone +declare <2 x i64> @llvm.abs.v2i64(<2 x i64>) readnone + +; +; Folds +; + +define i1 @fold_abs_abs_i1(i1 %a) { +; CHECK-LABEL: @fold_abs_abs_i1( +; CHECK-NEXT: ret i1 %a +; + %1 = call i1 @llvm.abs.i1(i1 %a) + %2 = call i1 @llvm.abs.i1(i1 %1) + ret i1 %2 +} + +define i32 @fold_abs_abs_i32(i32 %a) { +; CHECK-LABEL: @fold_abs_abs_i32( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 %a) +; CHECK-NEXT: ret i32 [[TMP1]] +; + %1 = call i32 @llvm.abs.i32(i32 %a) + %2 = call i32 @llvm.abs.i32(i32 %1) + ret i32 %2 +} + +define <4 x i32> @fold_abs_abs_v4i32(<4 x i32> %a) { +; CHECK-LABEL: @fold_abs_abs_v4i32( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; + %1 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a) + %2 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %1) + ret <4 x i32> %2 +} + +define i16 @fold_abs_mask_i16(i16 %a) { +; CHECK-LABEL: @fold_abs_mask_i16( +; CHECK-NEXT: [[TMP1:%.*]] = and i16 %a, 32765 +; CHECK-NEXT: ret i16 [[TMP1]] +; + %1 = and i16 %a, 32765 + %2 = call i16 @llvm.abs.i16(i16 %1) + ret i16 %2 +} + +define <4 x i32> @fold_abs_and_v4i32(<4 x i32> %x) { +; CHECK-LABEL: @fold_abs_and_v4i32( +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> %x, +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; + %1 = and <4 x i32> %x, + %2 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %1) + ret <4 x i32> %2 +} + +; +; Constant Folding +; + +define i1 @constant_fold_i1_false() { +; CHECK-LABEL: @constant_fold_i1_false( +; CHECK-NEXT: ret i1 false +; + %1 = call i1 @llvm.abs.i1(i1 0) + ret i1 %1 +} + +define i1 @constant_fold_i1_true() { +; CHECK-LABEL: @constant_fold_i1_true( +; CHECK-NEXT: ret i1 true +; + %1 = call i1 @llvm.abs.i1(i1 -1) + ret i1 %1 +} + +define i8 @constant_fold_i8() { +; CHECK-LABEL: @constant_fold_i8( +; CHECK-NEXT: ret i8 3 +; + %1 = call i8 @llvm.abs.i8(i8 -3) + ret i8 %1 +} + +define i8 @constant_fold_i8_min() { +; CHECK-LABEL: @constant_fold_i8_min( +; CHECK-NEXT: ret i8 -128 +; + %1 = call i8 @llvm.abs.i8(i8 -128) + ret i8 %1 +} + +define i16 @constant_fold_i16() { +; CHECK-LABEL: @constant_fold_i16( +; CHECK-NEXT: ret i16 555 +; + %1 = call i16 @llvm.abs.i16(i16 555) + ret i16 %1 +} + +define i32 @constant_fold_i32() { +; CHECK-LABEL: @constant_fold_i32( +; CHECK-NEXT: ret i32 32769 +; + %1 = call i32 @llvm.abs.i32(i32 -32769) + ret i32 %1 +} + +define i64 @constant_fold_i64() { +; CHECK-LABEL: @constant_fold_i64( +; CHECK-NEXT: ret i64 65535 +; + %1 = call i64 @llvm.abs.i64(i64 65535) + ret i64 %1 +} + +define i64 @constant_fold_i64_min() { +; CHECK-LABEL: @constant_fold_i64_min( +; CHECK-NEXT: ret i64 -9223372036854775808 +; + %1 = call i64 @llvm.abs.i64(i64 -9223372036854775808) + ret i64 %1 +} + +define i128 @constant_fold_i128() { +; CHECK-LABEL: @constant_fold_i128( +; CHECK-NEXT: ret i128 36893488147419103232 +; + %1 = shl i128 1, 65 + %2 = call i128 @llvm.abs.i128(i128 %1) + ret i128 %2 +} + +define <4 x i32> @constant_fold_v4i32() { +; CHECK-LABEL: @constant_fold_v4i32( +; CHECK-NEXT: ret <4 x i32> +; + %1 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> ) + ret <4 x i32> %1 +} + +define <2 x i64> @constant_fold_v2i64() { +; CHECK-LABEL: @constant_fold_v2i64( +; CHECK-NEXT: ret <2 x i64> +; + %1 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> ) + ret <2 x i64> %1 +}