Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -339,6 +339,12 @@ /// Bitwise operators - logical and, logical or, logical xor. AND, OR, XOR, + /// ABS - Determine the unsigned absolute value of a signed integer value of + /// the same bitwidth. + /// Note: A value of INT_MIN will return INT_MIN, no saturation or overflow + /// is performed. + ABS, + /// Shift and rotation operations. After legalization, the type of the /// shift amount is known to be TLI.getShiftAmountTy(). Before legalization /// the shift amount can be any type, but care must be taken to ensure it is Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -406,6 +406,7 @@ [SDNPCommutative, SDNPAssociative]>; def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>; +def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>; def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>; def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>; Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -260,6 +260,7 @@ SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitRotate(SDNode *N); + SDValue visitABS(SDNode *N); SDValue visitBSWAP(SDNode *N); SDValue visitBITREVERSE(SDNode *N); SDValue visitCTLZ(SDNode *N); @@ -1424,6 +1425,7 @@ case ISD::SRL: return visitSRL(N); case ISD::ROTR: case ISD::ROTL: return visitRotate(N); + case ISD::ABS: return visitABS(N); case ISD::BSWAP: return visitBSWAP(N); case ISD::BITREVERSE: return visitBITREVERSE(N); case ISD::CTLZ: return visitCTLZ(N); @@ -4753,6 +4755,17 @@ N01C->getAPIntValue(), DL, VT)); } } + + // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && + N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0)); + } + // fold (xor x, x) -> 0 if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); @@ -5485,6 +5498,22 @@ return SDValue(); } +SDValue DAGCombiner::visitABS(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (abs c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0); + // fold (abs (abs x)) -> (abs x) + if (N0.getOpcode() == ISD::ABS) + return N0; + // fold (abs x) -> x iff not-negative + if (DAG.SignBitIsZero(N0)) + return N0; + return SDValue(); +} + SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3269,6 +3269,9 @@ if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); break; + case ISD::ABS: + return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::BITREVERSE: return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(), C->isOpaque()); @@ -3388,6 +3391,7 @@ case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: + case ISD::ABS: case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: @@ -3505,6 +3509,12 @@ } if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); + break; + case ISD::ABS: + assert(VT.isInteger() && VT == Operand.getValueType() && + "Invalid ABS!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); break; case ISD::BSWAP: assert(VT.isInteger() && VT == Operand.getValueType() && Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -300,6 +300,7 @@ case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; // Bit manipulation + case ISD::ABS: return "abs"; case ISD::BITREVERSE: return "bitreverse"; case ISD::BSWAP: return "bswap"; case ISD::CTPOP: return "ctpop"; Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -900,6 +900,7 @@ setOperationAction(ISD::SMAX, VT, Expand); setOperationAction(ISD::UMIN, VT, Expand); setOperationAction(ISD::UMAX, VT, Expand); + setOperationAction(ISD::ABS, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -239,9 +239,6 @@ FHADD, FHSUB, - // Integer absolute value - ABS, - // Detect Conflicts Within a Vector CONFLICT, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -889,6 +889,9 @@ } if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { + setOperationAction(ISD::ABS, MVT::v16i8, Legal); + setOperationAction(ISD::ABS, MVT::v8i16, Legal); + setOperationAction(ISD::ABS, MVT::v4i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom); setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); @@ -1066,6 +1069,7 @@ setOperationAction(ISD::MULHS, MVT::v32i8, Custom); for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { + setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom); @@ -1272,6 +1276,8 @@ } } if (Subtarget.hasVLX()) { + setOperationAction(ISD::ABS, MVT::v4i64, Legal); + setOperationAction(ISD::ABS, MVT::v2i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); @@ -1368,6 +1374,7 @@ setOperationAction(ISD::MUL, MVT::v16i32, Legal); for (auto VT : { MVT::v16i32, MVT::v8i64 }) { + setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); @@ -1547,6 +1554,7 @@ for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); @@ -20957,6 +20965,25 @@ return Lower256IntArith(Op, DAG); } +static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) { + assert(Op.getSimpleValueType().is256BitVector() && + Op.getSimpleValueType().isInteger() && + "Only handle AVX 256-bit vector integer operation"); + MVT VT = Op.getSimpleValueType(); + unsigned NumElems = VT.getVectorNumElements(); + + SDLoc dl(Op); + SDValue Src = Op.getOperand(0); + SDValue Lo = extract128BitVector(Src, 0, DAG, dl); + SDValue Hi = extract128BitVector(Src, NumElems / 2, DAG, dl); + + MVT EltVT = VT.getVectorElementType(); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + DAG.getNode(ISD::ABS, dl, NewVT, Lo), + DAG.getNode(ISD::ABS, dl, NewVT, Hi)); +} + static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && @@ -23501,6 +23528,7 @@ case ISD::SMIN: case ISD::UMAX: case ISD::UMIN: return LowerMINMAX(Op, DAG); + case ISD::ABS: return LowerABS(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); @@ -23913,7 +23941,6 @@ case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; - case X86ISD::ABS: return "X86ISD::ABS"; case X86ISD::CONFLICT: return "X86ISD::CONFLICT"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMAXS: return "X86ISD::FMAXS"; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -8567,66 +8567,7 @@ HasBWI>; } -defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>; - -def avx512_v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), - VR128X:$src))>; -def avx512_v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128X:$src, (i8 15)))>; -def avx512_v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128X:$src, (i8 31)))>; -def avx512_v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), - VR256X:$src))>; -def avx512_v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256X:$src, (i8 15)))>; -def avx512_v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256X:$src, (i8 31)))>; - -let Predicates = [HasBWI, HasVLX] in { - def : Pat<(xor - (bc_v2i64 (avx512_v16i1sextv16i8)), - (bc_v2i64 (add (v16i8 VR128X:$src), (avx512_v16i1sextv16i8)))), - (VPABSBZ128rr VR128X:$src)>; - def : Pat<(xor - (bc_v2i64 (avx512_v8i1sextv8i16)), - (bc_v2i64 (add (v8i16 VR128X:$src), (avx512_v8i1sextv8i16)))), - (VPABSWZ128rr VR128X:$src)>; - def : Pat<(xor - (bc_v4i64 (avx512_v32i1sextv32i8)), - (bc_v4i64 (add (v32i8 VR256X:$src), (avx512_v32i1sextv32i8)))), - (VPABSBZ256rr VR256X:$src)>; - def : Pat<(xor - (bc_v4i64 (avx512_v16i1sextv16i16)), - (bc_v4i64 (add (v16i16 VR256X:$src), (avx512_v16i1sextv16i16)))), - (VPABSWZ256rr VR256X:$src)>; -} -let Predicates = [HasAVX512, HasVLX] in { - def : Pat<(xor - (bc_v2i64 (avx512_v4i1sextv4i32)), - (bc_v2i64 (add (v4i32 VR128X:$src), (avx512_v4i1sextv4i32)))), - (VPABSDZ128rr VR128X:$src)>; - def : Pat<(xor - (bc_v4i64 (avx512_v8i1sextv8i32)), - (bc_v4i64 (add (v8i32 VR256X:$src), (avx512_v8i1sextv8i32)))), - (VPABSDZ256rr VR256X:$src)>; -} - -let Predicates = [HasAVX512] in { -def : Pat<(xor - (bc_v8i64 (v16i1sextv16i32)), - (bc_v8i64 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), - (VPABSDZrr VR512:$src)>; -def : Pat<(xor - (bc_v8i64 (v8i1sextv8i64)), - (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), - (VPABSQZrr VR512:$src)>; -} -let Predicates = [HasBWI] in { -def : Pat<(xor - (bc_v8i64 (v64i1sextv64i8)), - (bc_v8i64 (add (v64i8 VR512:$src), (v64i1sextv64i8)))), - (VPABSBZrr VR512:$src)>; -def : Pat<(xor - (bc_v8i64 (v32i1sextv32i16)), - (bc_v8i64 (add (v32i16 VR512:$src), (v32i1sextv32i16)))), - (VPABSWZrr VR512:$src)>; -} +defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>; multiclass avx512_ctlz opc, string OpcodeStr, Predicate prd>{ Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -359,7 +359,6 @@ def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; -def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>; def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -5296,84 +5296,24 @@ Sched<[WriteVecALULd]>; } -// Helper fragments to match sext vXi1 to vXiY. -def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), - VR128:$src))>; -def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>; -def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>; -def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), - VR256:$src))>; -def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>; -def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>; - -let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, X86Abs, loadv2i64>, VEX, VEX_WIG; - defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, X86Abs, loadv2i64>, VEX, VEX_WIG; -} -let Predicates = [HasAVX, NoVLX] in { - defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, X86Abs, loadv2i64>, VEX, VEX_WIG; -} - let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - def : Pat<(xor - (bc_v2i64 (v16i1sextv16i8)), - (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), - (VPABSBrr VR128:$src)>; - def : Pat<(xor - (bc_v2i64 (v8i1sextv8i16)), - (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), - (VPABSWrr VR128:$src)>; + defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX, VEX_WIG; + defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX, VEX_WIG; } let Predicates = [HasAVX, NoVLX] in { - def : Pat<(xor - (bc_v2i64 (v4i1sextv4i32)), - (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), - (VPABSDrr VR128:$src)>; + defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX, VEX_WIG; } - let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, X86Abs>, VEX, VEX_L, VEX_WIG; - defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, X86Abs>, VEX, VEX_L, VEX_WIG; + defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L, VEX_WIG; + defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX] in { - defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, X86Abs>, VEX, VEX_L, VEX_WIG; + defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L, VEX_WIG; } -let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - def : Pat<(xor - (bc_v4i64 (v32i1sextv32i8)), - (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), - (VPABSBYrr VR256:$src)>; - def : Pat<(xor - (bc_v4i64 (v16i1sextv16i16)), - (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), - (VPABSWYrr VR256:$src)>; -} -let Predicates = [HasAVX2, NoVLX] in { - def : Pat<(xor - (bc_v4i64 (v8i1sextv8i32)), - (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), - (VPABSDYrr VR256:$src)>; -} - -defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, X86Abs, memopv2i64>; -defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, X86Abs, memopv2i64>; -defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, X86Abs, memopv2i64>; - -let Predicates = [UseSSSE3] in { - def : Pat<(xor - (bc_v2i64 (v16i1sextv16i8)), - (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), - (PABSBrr VR128:$src)>; - def : Pat<(xor - (bc_v2i64 (v8i1sextv8i16)), - (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), - (PABSWrr VR128:$src)>; - def : Pat<(xor - (bc_v2i64 (v4i1sextv4i32)), - (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), - (PABSDrr VR128:$src)>; -} +defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>; +defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>; +defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>; //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -351,9 +351,9 @@ X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), - X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, ISD::ABS, 0), X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), @@ -831,18 +831,18 @@ X86ISD::FMUL_RND, 0), X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMUL_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), @@ -1684,9 +1684,9 @@ X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0), X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0), X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0), - X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, X86ISD::ABS, 0), - X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, ISD::ABS, 0), + X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, ISD::ABS, 0), + X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, ISD::ABS, 0), X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0), Index: test/CodeGen/X86/combine-abs.ll =================================================================== --- test/CodeGen/X86/combine-abs.ll +++ test/CodeGen/X86/combine-abs.ll @@ -1,13 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s -; FIXME: Various missed opportunities to simplify integer absolute instructions. - ; fold (abs c1) -> c2 define <4 x i32> @combine_v4i32_abs_constant() { ; CHECK-LABEL: combine_v4i32_abs_constant: ; CHECK: # BB#0: -; CHECK-NEXT: vpabsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,3,2147483648] ; CHECK-NEXT: retq %1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> ) ret <4 x i32> %1 @@ -16,7 +14,7 @@ define <16 x i16> @combine_v16i16_abs_constant() { ; CHECK-LABEL: combine_v16i16_abs_constant: ; CHECK: # BB#0: -; CHECK-NEXT: vpabsw {{.*}}(%rip), %ymm0 +; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,1,3,3,7,7,255,255,4096,4096,32767,32767,32768,32768,0] ; CHECK-NEXT: retq %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> ) ret <16 x i16> %1 @@ -27,7 +25,6 @@ ; CHECK-LABEL: combine_v8i16_abs_abs: ; CHECK: # BB#0: ; CHECK-NEXT: vpabsw %xmm0, %xmm0 -; CHECK-NEXT: vpabsw %xmm0, %xmm0 ; CHECK-NEXT: retq %a1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a) %n2 = sub <8 x i16> zeroinitializer, %a1 @@ -40,7 +37,6 @@ ; CHECK-LABEL: combine_v32i8_abs_abs: ; CHECK: # BB#0: ; CHECK-NEXT: vpabsb %ymm0, %ymm0 -; CHECK-NEXT: vpabsb %ymm0, %ymm0 ; CHECK-NEXT: retq %n1 = sub <32 x i8> zeroinitializer, %a %b1 = icmp slt <32 x i8> %a, zeroinitializer Index: test/CodeGen/X86/viabs.ll =================================================================== --- test/CodeGen/X86/viabs.ll +++ test/CodeGen/X86/viabs.ll @@ -147,14 +147,10 @@ ; ; AVX1-LABEL: test_abs_gt_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 -; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3 -; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1 -; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_gt_v8i32: @@ -193,14 +189,10 @@ ; ; AVX1-LABEL: test_abs_ge_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 -; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3 -; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1 -; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_ge_v8i32: @@ -239,14 +231,10 @@ ; ; AVX1-LABEL: test_abs_gt_v16i16: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpsraw $15, %xmm1, %xmm2 -; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsraw $15, %xmm0, %xmm3 -; AVX1-NEXT: vpaddw %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1 -; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpabsw %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsw %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_gt_v16i16: @@ -285,15 +273,10 @@ ; ; AVX1-LABEL: test_abs_lt_v32i8: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm3 -; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm4 -; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vxorps %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vpabsb %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsb %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_lt_v32i8: @@ -332,14 +315,10 @@ ; ; AVX1-LABEL: test_abs_le_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 -; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3 -; AVX1-NEXT: vpaddd %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm1 -; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_le_v8i32: @@ -388,22 +367,14 @@ ; ; AVX1-LABEL: test_abs_le_16i32: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3 -; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsrad $31, %xmm0, %xmm4 -; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2 -; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3 -; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm4 -; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2 -; AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vpabsd %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsd %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vpabsd %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpabsd %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_le_16i32: @@ -450,9 +421,7 @@ ; ; AVX512-LABEL: test_abs_ge_v2i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpsraq $63, %xmm0, %xmm1 -; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpabsq %xmm0, %xmm0 ; AVX512-NEXT: retq %tmp1neg = sub <2 x i64> zeroinitializer, %a %b = icmp sge <2 x i64> %a, zeroinitializer @@ -499,9 +468,7 @@ ; ; AVX512-LABEL: test_abs_gt_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpsraq $63, %ymm0, %ymm1 -; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpabsq %ymm0, %ymm0 ; AVX512-NEXT: retq %tmp1neg = sub <4 x i64> zeroinitializer, %a %b = icmp sgt <4 x i64> %a, @@ -691,23 +658,14 @@ ; ; AVX1-LABEL: test_abs_lt_v64i8: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4 -; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm5 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm6 -; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vxorps %ymm6, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm4 -; AVX1-NEXT: vpcmpgtb %xmm1, %xmm3, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm5 -; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1 +; AVX1-NEXT: vpabsb %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsb %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vpabsb %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpabsb %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_lt_v64i8: @@ -763,22 +721,14 @@ ; ; AVX1-LABEL: test_abs_gt_v32i16: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3 -; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsraw $15, %xmm0, %xmm4 -; AVX1-NEXT: vpaddw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2 -; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3 -; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsraw $15, %xmm1, %xmm4 -; AVX1-NEXT: vpaddw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm2 -; AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vpabsw %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpabsw %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vpabsw %xmm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpabsw %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_abs_gt_v32i16: