Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -10775,6 +10775,35 @@
 This function returns the same values as the libm ``round``
 functions would, and handles error conditions in the same way.
 
+'``llvm.abs.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic function. You can use abs on any
+integer type.
+
+::
+
+      declare i16 @llvm.abs.i16(i16 <id>)
+      declare i32 @llvm.abs.i32(i32 <id>)
+      declare i64 @llvm.abs.i64(i64 <id>)
+
+Overview:
+"""""""""
+
+The '``llvm.abs``' family of intrinsics is used to determine the unsigned
+absolute value of a signed integer value.
+
+Semantics:
+""""""""""
+
+The ``llvm.abs.iN`` intrinsic returns an unsigned iN integer value
+representing the absolute value of a signed iN integer value.
+The integer absolute result of the minimum signed value is expected to be
+the same value (e.g. -32768 (0x8000) -> +32768 (0x8000)).
+
 Bit Manipulation Intrinsics
 ---------------------------
 
Index: include/llvm/CodeGen/ISDOpcodes.h
===================================================================
--- include/llvm/CodeGen/ISDOpcodes.h
+++ include/llvm/CodeGen/ISDOpcodes.h
@@ -332,6 +332,9 @@
     /// Bitwise operators - logical and, logical or, logical xor.
     AND, OR, XOR,
 
+    /// Signed integer absolute.
+    ABS,
+
     /// Shift and rotation operations.  After legalization, the type of the
     /// shift amount is known to be TLI.getShiftAmountTy().  Before legalization
     /// the shift amount can be any type, but care must be taken to ensure it is
Index: include/llvm/IR/Intrinsics.td
===================================================================
--- include/llvm/IR/Intrinsics.td
+++ include/llvm/IR/Intrinsics.td
@@ -443,6 +443,7 @@
 
 // None of these intrinsics accesses memory at all.
 let IntrProperties = [IntrNoMem] in {
+  def int_abs:   Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
Index: include/llvm/Target/TargetSelectionDAG.td
===================================================================
--- include/llvm/Target/TargetSelectionDAG.td
+++ include/llvm/Target/TargetSelectionDAG.td
@@ -406,6 +406,7 @@
                                   [SDNPCommutative, SDNPAssociative]>;
 
 def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
+def abs        : SDNode<"ISD::ABS"        , SDTIntUnaryOp>;
 def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
 def bswap      : SDNode<"ISD::BSWAP"      , SDTIntUnaryOp>;
 def ctlz       : SDNode<"ISD::CTLZ"       , SDTIntUnaryOp>;
Index: lib/Analysis/ConstantFolding.cpp
===================================================================
--- lib/Analysis/ConstantFolding.cpp
+++ lib/Analysis/ConstantFolding.cpp
@@ -1331,6 +1331,7 @@
   case Intrinsic::nearbyint:
   case Intrinsic::pow:
   case Intrinsic::powi:
+  case Intrinsic::abs:
   case Intrinsic::bswap:
   case Intrinsic::ctpop:
   case Intrinsic::ctlz:
@@ -1681,6 +1682,8 @@
 
     if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
       switch (IntrinsicID) {
+      case Intrinsic::abs:
+        return ConstantInt::get(Ty->getContext(), Op->getValue().abs());
       case Intrinsic::bswap:
         return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
       case Intrinsic::ctpop:
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -258,6 +258,7 @@
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
     SDValue visitRotate(SDNode *N);
+    SDValue visitABS(SDNode *N);
     SDValue visitBSWAP(SDNode *N);
     SDValue visitBITREVERSE(SDNode *N);
     SDValue visitCTLZ(SDNode *N);
@@ -1411,6 +1412,7 @@
   case ISD::SRL:                return visitSRL(N);
   case ISD::ROTR:
   case ISD::ROTL:               return visitRotate(N);
+  case ISD::ABS:                return visitABS(N);
   case ISD::BSWAP:              return visitBSWAP(N);
   case ISD::BITREVERSE:         return visitBITREVERSE(N);
   case ISD::CTLZ:               return visitCTLZ(N);
@@ -4355,6 +4357,17 @@
                                          N01C->getAPIntValue(), DL, VT));
     }
   }
+
+  // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
+  unsigned OpSizeInBits = VT.getScalarSizeInBits();
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
+      N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
+      TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+    if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+      if (C->getAPIntValue() == (OpSizeInBits - 1))
+        return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
+  }
+
   // fold (xor x, x) -> 0
   if (N0 == N1)
     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
@@ -5084,6 +5097,22 @@
   return SDValue();
 }
 
+SDValue DAGCombiner::visitABS(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (abs c1) -> c2
+  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+    return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
+  // fold (abs (abs x)) -> (abs x)
+  if (N0.getOpcode() == ISD::ABS)
+    return N0;
+  // fold (abs x) -> x iff not-negative
+  if (DAG.SignBitIsZero(N0))
+    return N0;
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -147,6 +147,7 @@
   SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
                                 const SDLoc &dl);
 
+  SDValue ExpandABS(SDValue Op, const SDLoc &dl);
   SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
   SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
   SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl);
@@ -2533,6 +2534,21 @@
   return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
 }
 
+/// Legalize an integer absolute operation.
+SDValue SelectionDAGLegalize::ExpandABS(SDValue Op, const SDLoc &dl) {
+  EVT VT = Op.getValueType();
+  EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+
+  // Branchless integer absolute.
+  // sign = x sra>> (scalarsizeinbits - 1)
+  // abs(x) = (x + sign) ^ sign.
+  SDLoc DL(Op);
+  SDValue Shift = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, SHVT);
+  SDValue Mask = DAG.getNode(ISD::SRA, DL, VT, Op, Shift);
+  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, Op, Mask);
+  return DAG.getNode(ISD::XOR, DL, VT, Add, Mask);
+}
+
 /// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
 SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
   EVT VT = Op.getValueType();
@@ -2777,6 +2793,9 @@
     Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
     Results.push_back(Tmp1);
     break;
+  case ISD::ABS:
+    Results.push_back(ExpandABS(Node->getOperand(0), dl));
+    break;
   case ISD::BITREVERSE:
     Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl));
     break;
Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -123,6 +123,8 @@
   case ISD::SUB:
   case ISD::MUL:         Res = PromoteIntRes_SimpleIntBinOp(N); break;
 
+  case ISD::ABS:         Res = PromoteIntRes_SExtIntUnaryOp(N); break;
+
   case ISD::SDIV:
   case ISD::SREM:        Res = PromoteIntRes_SExtIntBinOp(N); break;
 
@@ -647,6 +649,12 @@
                      LHS.getValueType(), LHS, RHS);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntUnaryOp(SDNode *N) {
+  // Sign extend the input.
+  SDValue Src = SExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), SDLoc(N), Src.getValueType(), Src);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
   // Sign extend the input.
   SDValue LHS = SExtPromotedInteger(N->getOperand(0));
@@ -1313,6 +1321,7 @@
   case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
   case ISD::VAARG:              ExpandRes_VAARG(N, Lo, Hi); break;
 
+  case ISD::ABS:         ExpandIntRes_ABS(N, Lo, Hi); break;
   case ISD::ANY_EXTEND:  ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
   case ISD::AssertSext:  ExpandIntRes_AssertSext(N, Lo, Hi); break;
   case ISD::AssertZext:  ExpandIntRes_AssertZext(N, Lo, Hi); break;
@@ -1866,6 +1875,26 @@
   ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
 }
 
+void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N,
+                                        SDValue &Lo, SDValue &Hi) {
+  SDLoc dl(N);
+
+  // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
+  EVT VT = N->getValueType(0);
+  SDValue N0 = N->getOperand(0);
+  SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
+                            DAG.getConstant(0, dl, VT), N0);
+  SDValue NegLo, NegHi;
+  SplitInteger(Neg, NegLo, NegHi);
+
+  GetExpandedInteger(N0, Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
+                                 DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
+  Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
+  Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
                                                SDValue &Lo, SDValue &Hi) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -271,6 +271,7 @@
   SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
   SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
   SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
+  SDValue PromoteIntRes_SExtIntUnaryOp(SDNode *N);
   SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
   SDValue PromoteIntRes_SRA(SDNode *N);
   SDValue PromoteIntRes_SRL(SDNode *N);
@@ -345,6 +346,7 @@
   void ExpandIntRes_FP_TO_UINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandIntRes_Logical           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ABS               (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUB            (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUBC           (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUBE           (SDNode *N, SDValue &Lo, SDValue &Hi);
Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,6 +105,7 @@
   SDValue ExpandLoad(SDValue Op);
   SDValue ExpandStore(SDValue Op);
   SDValue ExpandFNEG(SDValue Op);
+  SDValue ExpandABS(SDValue Op);
   SDValue ExpandBITREVERSE(SDValue Op);
   SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op);
 
@@ -276,6 +277,7 @@
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR:
+  case ISD::ABS:
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL:
@@ -691,6 +693,8 @@
     return ExpandFNEG(Op);
   case ISD::SETCC:
     return UnrollVSETCC(Op);
+  case ISD::ABS:
+    return ExpandABS(Op);
   case ISD::BITREVERSE:
     return ExpandBITREVERSE(Op);
   case ISD::CTLZ_ZERO_UNDEF:
@@ -880,6 +884,20 @@
   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
 }
 
+SDValue VectorLegalizer::ExpandABS(SDValue Op) {
+  EVT VT = Op.getValueType();
+
+  // If we have the appropriate vector bit operations, it is better to use them
+  // than unrolling and expanding each component.
+  if (!TLI.isOperationLegalOrCustom(ISD::ADD, VT) ||
+      !TLI.isOperationLegalOrCustom(ISD::SRA, VT) ||
+      !TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT))
+    return DAG.UnrollVectorOp(Op.getNode());
+
+  // Let LegalizeDAG handle this later.
+  return Op;
+}
+
 SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
   EVT VT = Op.getValueType();
 
Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -66,6 +66,7 @@
   case ISD::SETCC:             R = ScalarizeVecRes_SETCC(N); break;
   case ISD::UNDEF:             R = ScalarizeVecRes_UNDEF(N); break;
   case ISD::VECTOR_SHUFFLE:    R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+  case ISD::ABS:
   case ISD::ANY_EXTEND:
   case ISD::BITREVERSE:
   case ISD::BSWAP:
@@ -619,6 +620,7 @@
     SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
     break;
 
+  case ISD::ABS:
   case ISD::BITREVERSE:
   case ISD::BSWAP:
   case ISD::CONVERT_RNDSAT:
@@ -2134,6 +2136,7 @@
     Res = WidenVecRes_Convert(N);
     break;
 
+  case ISD::ABS:
   case ISD::BITREVERSE:
   case ISD::BSWAP:
   case ISD::CTLZ:
Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3052,6 +3052,9 @@
       if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
         return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT);
       break;
+    case ISD::ABS:
+      return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
+                         C->isOpaque());
     case ISD::BSWAP:
       return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
                          C->isOpaque());
@@ -3149,6 +3152,7 @@
       case ISD::TRUNCATE:
       case ISD::UINT_TO_FP:
       case ISD::SINT_TO_FP:
+      case ISD::ABS:
       case ISD::BSWAP:
       case ISD::CTLZ:
       case ISD::CTLZ_ZERO_UNDEF:
@@ -3266,6 +3270,14 @@
     if (OpOpcode == ISD::UNDEF)
       return getUNDEF(VT);
     break;
+  case ISD::ABS:
+    assert(VT.isInteger() && VT == Operand.getValueType() &&
+           "Invalid ABS!");
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
+    if (VT.getScalarType() == MVT::i1)
+      return Operand;
+    break;
   case ISD::BSWAP:
     assert(VT.isInteger() && VT == Operand.getValueType() &&
            "Invalid BSWAP!");
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5335,6 +5335,11 @@
     DAG.setRoot(Res.getValue(1));
     return nullptr;
   }
+  case Intrinsic::abs:
+    setValue(&I, DAG.getNode(ISD::ABS, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return nullptr;
   case Intrinsic::bitreverse:
     setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -315,6 +315,7 @@
   case ISD::GET_DYNAMIC_AREA_OFFSET:    return "get.dynamic.area.offset";
 
   // Bit manipulation
+  case ISD::ABS:                        return "abs";
   case ISD::BITREVERSE:                 return "bitreverse";
   case ISD::BSWAP:                      return "bswap";
   case ISD::CTPOP:                      return "ctpop";
Index: lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- lib/CodeGen/TargetLoweringBase.cpp
+++ lib/CodeGen/TargetLoweringBase.cpp
@@ -878,6 +878,7 @@
     setOperationAction(ISD::SMAX, VT, Expand);
     setOperationAction(ISD::UMIN, VT, Expand);
     setOperationAction(ISD::UMAX, VT, Expand);
+    setOperationAction(ISD::ABS, VT, Expand);
 
     // Overflow operations default to expand
     setOperationAction(ISD::SADDO, VT, Expand);
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h
+++ lib/Target/X86/X86ISelLowering.h
@@ -238,9 +238,6 @@
       FHADD,
       FHSUB,
 
-      // Integer absolute value
-      ABS,
-
       // Detect Conflicts Within a Vector
       CONFLICT,
 
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -886,6 +886,9 @@
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
+    setOperationAction(ISD::ABS,                MVT::v16i8, Legal);
+    setOperationAction(ISD::ABS,                MVT::v8i16, Legal);
+    setOperationAction(ISD::ABS,                MVT::v4i32, Legal);
     setOperationAction(ISD::BITREVERSE,         MVT::v16i8, Custom);
     setOperationAction(ISD::CTLZ,               MVT::v16i8, Custom);
     setOperationAction(ISD::CTLZ,               MVT::v8i16, Custom);
@@ -1063,6 +1066,7 @@
     setOperationAction(ISD::MULHS,     MVT::v32i8,  Custom);
 
     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
+      setOperationAction(ISD::ABS,  VT, HasInt256 ? Legal : Custom);
       setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
       setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
       setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
@@ -1262,6 +1266,8 @@
       }
     }
     if (Subtarget.hasVLX()) {
+      setOperationAction(ISD::ABS,              MVT::v4i64, Legal);
+      setOperationAction(ISD::ABS,              MVT::v2i64, Legal);
       setOperationAction(ISD::SINT_TO_FP,       MVT::v8i32, Legal);
       setOperationAction(ISD::UINT_TO_FP,       MVT::v8i32, Legal);
       setOperationAction(ISD::FP_TO_SINT,       MVT::v8i32, Legal);
@@ -1360,6 +1366,7 @@
     setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
 
     for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
+      setOperationAction(ISD::ABS, VT, Legal);
       setOperationAction(ISD::SRL, VT, Custom);
       setOperationAction(ISD::SHL, VT, Custom);
       setOperationAction(ISD::SRA, VT, Custom);
@@ -1539,6 +1546,7 @@
     for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
       setOperationAction(ISD::VSELECT,      VT, Legal);
+      setOperationAction(ISD::ABS,          VT, Legal);
       setOperationAction(ISD::SRL,          VT, Custom);
       setOperationAction(ISD::SHL,          VT, Custom);
       setOperationAction(ISD::SRA,          VT, Custom);
@@ -19748,6 +19756,25 @@
   return Lower256IntArith(Op, DAG);
 }
 
+static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
+  assert(Op.getSimpleValueType().is256BitVector() &&
+         Op.getSimpleValueType().isInteger() &&
+         "Only handle AVX 256-bit vector integer operation");
+  MVT VT = Op.getSimpleValueType();
+  unsigned NumElems = VT.getVectorNumElements();
+
+  SDLoc dl(Op);
+  SDValue Src = Op.getOperand(0);
+  SDValue Lo = extract128BitVector(Src, 0, DAG, dl);
+  SDValue Hi = extract128BitVector(Src, NumElems / 2, DAG, dl);
+
+  MVT EltVT = VT.getVectorElementType();
+  MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+                     DAG.getNode(Op.getOpcode(), dl, NewVT, Lo),
+                     DAG.getNode(Op.getOpcode(), dl, NewVT, Hi));
+}
+
 static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
   assert(Op.getSimpleValueType().is256BitVector() &&
          Op.getSimpleValueType().isInteger() &&
@@ -22251,6 +22278,7 @@
   case ISD::SUBE:               return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
   case ISD::ADD:                return LowerADD(Op, DAG);
   case ISD::SUB:                return LowerSUB(Op, DAG);
+  case ISD::ABS:                return LowerABS(Op, DAG);
   case ISD::SMAX:
   case ISD::SMIN:
   case ISD::UMAX:
@@ -22648,7 +22676,6 @@
   case X86ISD::HSUB:               return "X86ISD::HSUB";
   case X86ISD::FHADD:              return "X86ISD::FHADD";
   case X86ISD::FHSUB:              return "X86ISD::FHSUB";
-  case X86ISD::ABS:                return "X86ISD::ABS";
   case X86ISD::CONFLICT:           return "X86ISD::CONFLICT";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMAX_RND:           return "X86ISD::FMAX_RND";
Index: lib/Target/X86/X86InstrAVX512.td
===================================================================
--- lib/Target/X86/X86InstrAVX512.td
+++ lib/Target/X86/X86InstrAVX512.td
@@ -8310,57 +8310,7 @@
                                     HasBWI>;
 }
 
-defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
-
-let Predicates = [HasBWI, HasVLX] in {
-  def : Pat<(xor
-            (bc_v2i64 (v16i1sextv16i8)),
-            (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
-            (VPABSBZ128rr VR128:$src)>;
-  def : Pat<(xor
-            (bc_v2i64 (v8i1sextv8i16)),
-            (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
-            (VPABSWZ128rr VR128:$src)>;
-  def : Pat<(xor
-            (bc_v4i64 (v32i1sextv32i8)),
-            (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
-            (VPABSBZ256rr VR256:$src)>;
-  def : Pat<(xor
-            (bc_v4i64 (v16i1sextv16i16)),
-            (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
-            (VPABSWZ256rr VR256:$src)>;
-}
-let Predicates = [HasAVX512, HasVLX] in {
-  def : Pat<(xor
-            (bc_v2i64 (v4i1sextv4i32)),
-            (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
-            (VPABSDZ128rr VR128:$src)>;
-  def : Pat<(xor
-            (bc_v4i64 (v8i1sextv8i32)),
-            (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
-            (VPABSDZ256rr VR256:$src)>;
-}
-
-let Predicates = [HasAVX512] in {
-def : Pat<(xor
-          (bc_v8i64 (v16i1sextv16i32)),
-          (bc_v8i64 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
-          (VPABSDZrr VR512:$src)>;
-def : Pat<(xor
-          (bc_v8i64 (v8i1sextv8i64)),
-          (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
-          (VPABSQZrr VR512:$src)>;
-}
-let Predicates = [HasBWI] in {
-def : Pat<(xor
-          (bc_v8i64 (v64i1sextv64i8)),
-          (bc_v8i64 (add (v64i8 VR512:$src), (v64i1sextv64i8)))),
-          (VPABSBZrr VR512:$src)>;
-def : Pat<(xor
-          (bc_v8i64 (v32i1sextv32i16)),
-          (bc_v8i64 (add (v32i16 VR512:$src), (v32i1sextv32i16)))),
-          (VPABSWZrr VR512:$src)>;
-}
+defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
 
 multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
 
Index: lib/Target/X86/X86InstrFragmentsSIMD.td
===================================================================
--- lib/Target/X86/X86InstrFragmentsSIMD.td
+++ lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -352,7 +352,6 @@
 def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
 def X86VAlign  : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
 
-def X86Abs      : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
 def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
 
 def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
Index: lib/Target/X86/X86InstrSSE.td
===================================================================
--- lib/Target/X86/X86InstrSSE.td
+++ lib/Target/X86/X86InstrSSE.td
@@ -5330,7 +5330,6 @@
 // SSSE3 - Packed Absolute Instructions
 //===---------------------------------------------------------------------===//
 
-
 /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt,
                         SDNode OpNode, PatFrag ld_frag> {
@@ -5365,84 +5364,25 @@
                   Sched<[WriteVecALULd]>;
 }
 
-// Helper fragments to match sext vXi1 to vXiY.
-def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
-                                               VR128:$src))>;
-def v8i1sextv8i16  : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>;
-def v4i1sextv4i32  : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>;
-def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
-                                               VR256:$src))>;
-def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>;
-def v8i1sextv8i32  : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>;
-
-let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
-  defm VPABSB  : SS3I_unop_rm<0x1C, "vpabsb", v16i8, X86Abs, loadv2i64>, VEX;
-  defm VPABSW  : SS3I_unop_rm<0x1D, "vpabsw", v8i16, X86Abs, loadv2i64>, VEX;
-}
-let Predicates = [HasAVX, NoVLX] in {
-  defm VPABSD  : SS3I_unop_rm<0x1E, "vpabsd", v4i32, X86Abs, loadv2i64>, VEX;
-}
-
 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
-  def : Pat<(xor
-            (bc_v2i64 (v16i1sextv16i8)),
-            (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
-            (VPABSBrr VR128:$src)>;
-  def : Pat<(xor
-            (bc_v2i64 (v8i1sextv8i16)),
-            (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
-            (VPABSWrr VR128:$src)>;
+  defm VPABSB  : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX;
+  defm VPABSW  : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX;
 }
 let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(xor
-            (bc_v2i64 (v4i1sextv4i32)),
-            (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
-            (VPABSDrr VR128:$src)>;
+  defm VPABSD  : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX;
 }
 
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
-  defm VPABSB  : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, X86Abs>, VEX, VEX_L;
-  defm VPABSW  : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, X86Abs>, VEX, VEX_L;
+  defm VPABSB  : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L;
+  defm VPABSW  : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L;
 }
 let Predicates = [HasAVX2, NoVLX] in {
-  defm VPABSD  : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, X86Abs>, VEX, VEX_L;
+  defm VPABSD  : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L;
 }
 
-let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
-  def : Pat<(xor
-            (bc_v4i64 (v32i1sextv32i8)),
-            (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
-            (VPABSBYrr VR256:$src)>;
-  def : Pat<(xor
-            (bc_v4i64 (v16i1sextv16i16)),
-            (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
-            (VPABSWYrr VR256:$src)>;
-}
-let Predicates = [HasAVX2, NoVLX] in {
-  def : Pat<(xor
-            (bc_v4i64 (v8i1sextv8i32)),
-            (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
-            (VPABSDYrr VR256:$src)>;
-}
-
-defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, X86Abs, memopv2i64>;
-defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, X86Abs, memopv2i64>;
-defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, X86Abs, memopv2i64>;
-
-let Predicates = [UseSSSE3] in {
-  def : Pat<(xor
-            (bc_v2i64 (v16i1sextv16i8)),
-            (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
-            (PABSBrr VR128:$src)>;
-  def : Pat<(xor
-            (bc_v2i64 (v8i1sextv8i16)),
-            (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
-            (PABSWrr VR128:$src)>;
-  def : Pat<(xor
-            (bc_v2i64 (v4i1sextv4i32)),
-            (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
-            (PABSDrr VR128:$src)>;
-}
+defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>;
+defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>;
+defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>;
 
 //===---------------------------------------------------------------------===//
 // SSSE3 - Packed Binary Operator Instructions
Index: lib/Target/X86/X86IntrinsicsInfo.h
===================================================================
--- lib/Target/X86/X86IntrinsicsInfo.h
+++ lib/Target/X86/X86IntrinsicsInfo.h
@@ -275,9 +275,9 @@
   X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
   X86_INTRINSIC_DATA(avx_vpermilvar_ps,     INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
   X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
-  X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, ISD::ABS, 0),
   X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
@@ -803,18 +803,18 @@
                      X86ISD::FMUL_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
                      X86ISD::FMUL_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
   X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
@@ -1690,9 +1690,9 @@
   X86_INTRINSIC_DATA(sse41_pmuldq,      INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
   X86_INTRINSIC_DATA(sse4a_extrqi,      INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
   X86_INTRINSIC_DATA(sse4a_insertqi,    INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
-  X86_INTRINSIC_DATA(ssse3_pabs_b_128,  INTR_TYPE_1OP, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(ssse3_pabs_d_128,  INTR_TYPE_1OP, X86ISD::ABS, 0),
-  X86_INTRINSIC_DATA(ssse3_pabs_w_128,  INTR_TYPE_1OP, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(ssse3_pabs_b_128,  INTR_TYPE_1OP, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(ssse3_pabs_d_128,  INTR_TYPE_1OP, ISD::ABS, 0),
+  X86_INTRINSIC_DATA(ssse3_pabs_w_128,  INTR_TYPE_1OP, ISD::ABS, 0),
   X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
   X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
   X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1441,6 +1441,22 @@
     break;
   }
 
+  case Intrinsic::abs: {
+    Type *Ty = II->getType();
+    Value *IIOperand = II->getArgOperand(0);
+
+    // abs(i1 x) -> x
+    if (Ty->getScalarType()->isIntegerTy(1))
+      return replaceInstUsesWith(CI, IIOperand);
+    // abs(abs(x)) -> abs(x)
+    if (match(IIOperand, m_Intrinsic<Intrinsic::abs>()))
+      return replaceInstUsesWith(CI, IIOperand);
+    // fold (abs x) -> x iff not-negative
+    if (isKnownNonNegative(IIOperand, getDataLayout()))
+      return replaceInstUsesWith(CI, IIOperand);
+    break;
+  }
+
   case Intrinsic::bitreverse: {
     Value *IIOperand = II->getArgOperand(0);
     Value *X = nullptr;
Index: test/CodeGen/X86/combine-abs.ll
===================================================================
--- test/CodeGen/X86/combine-abs.ll
+++ test/CodeGen/X86/combine-abs.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
+
+; fold (abs c1) -> c
+define <4 x i32> @combine_fold_abs_v4i32() {
+; SSE-LABEL: combine_fold_abs_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,3,0,2147483648]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_fold_abs_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65535,3,0,2147483648]
+; AVX-NEXT:    retq
+  %1 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> <i32 65535, i32 -3, i32 0, i32 -2147483648>)
+  ret <4 x i32> %1
+}
+
+; fold (abs (abs x)) -> (abs x)
+define <4 x i32> @combine_abs_abs_v4i32(<4 x i32> %x) {
+; SSE2-LABEL: combine_abs_abs_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    paddd %xmm1, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: combine_abs_abs_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pabsd %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_abs_abs_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpabsd %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x)
+  %2 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+; fold (abs x) -> x iff not-negative
+define <4 x i32> @combine_abs_and_v4i32(<4 x i32> %x) {
+; SSE-LABEL: combine_abs_and_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_abs_and_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = and <4 x i32> %x, <i32 1, i32 255, i32 65535, i32 2147483647>
+  %2 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>) readnone
Index: test/CodeGen/X86/legalize-abs.ll
===================================================================
--- test/CodeGen/X86/legalize-abs.ll
+++ test/CodeGen/X86/legalize-abs.ll
@@ -0,0 +1,279 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2     | FileCheck %s --check-prefix=X32 --check-prefix=X32-SSE --check-prefix=X32-SSE2
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+ssse3    | FileCheck %s --check-prefix=X32 --check-prefix=X32-SSE --check-prefix=X32-SSSE3
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx      | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX --check-prefix=X32-AVX1
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2     | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX --check-prefix=X32-AVX2
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX --check-prefix=X32-AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2     | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3    | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx      | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2     | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX512
+
+declare i1 @llvm.abs.i1(i1) readnone
+declare i27 @llvm.abs.i27(i27) readnone
+declare i64 @llvm.abs.i64(i64) readnone
+declare i128 @llvm.abs.i128(i128) readnone
+declare <4 x i31> @llvm.abs.v4i31(<4 x i31>) readnone
+declare <2 x i33> @llvm.abs.v2i33(<2 x i33>) readnone
+
+define i1 @test_abs_i1(i1 %a) nounwind {
+; X32-LABEL: test_abs_i1:
+; X32:       # BB#0:
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_abs_i1:
+; X64:       # BB#0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    retq
+  %b = call i1 @llvm.abs.i1(i1 %a)
+  ret i1 %b
+}
+
+define i27 @test_abs_i27(i27 %a) nounwind {
+; X32-LABEL: test_abs_i27:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    shll $5, %ecx
+; X32-NEXT:    sarl $5, %ecx
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    negl %eax
+; X32-NEXT:    cmovll %ecx, %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_abs_i27:
+; X64:       # BB#0:
+; X64-NEXT:    shll $5, %edi
+; X64-NEXT:    sarl $5, %edi
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    cmovll %edi, %eax
+; X64-NEXT:    retq
+  %b = call i27 @llvm.abs.i27(i27 %a)
+  ret i27 %b
+}
+
+define i64 @test_abs_i64(i64 %a) nounwind {
+; X32-LABEL: test_abs_i64:
+; X32:       # BB#0:
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT:    xorl %edx, %edx
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    negl %eax
+; X32-NEXT:    sbbl %esi, %edx
+; X32-NEXT:    testl %esi, %esi
+; X32-NEXT:    cmovnsl %ecx, %eax
+; X32-NEXT:    cmovnsl %esi, %edx
+; X32-NEXT:    popl %esi
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_abs_i64:
+; X64:       # BB#0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovlq %rdi, %rax
+; X64-NEXT:    retq
+  %b = call i64 @llvm.abs.i64(i64 %a)
+  ret i64 %b
+}
+
+define i128 @test_abs_i128(i128 %a) nounwind {
+; X32-LABEL: test_abs_i128:
+; X32:       # BB#0:
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %edi
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    xorl %esi, %esi
+; X32-NEXT:    negl %edi
+; X32-NEXT:    movl $0, %ebx
+; X32-NEXT:    sbbl %edx, %ebx
+; X32-NEXT:    movl $0, %ebp
+; X32-NEXT:    sbbl %ecx, %ebp
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    sbbl %eax, %esi
+; X32-NEXT:    testl %eax, %eax
+; X32-NEXT:    cmovnsl %eax, %esi
+; X32-NEXT:    cmovnsl %ecx, %ebp
+; X32-NEXT:    cmovnsl %edx, %ebx
+; X32-NEXT:    cmovnsl {{[0-9]+}}(%esp), %edi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl %edi, (%eax)
+; X32-NEXT:    movl %ebx, 4(%eax)
+; X32-NEXT:    movl %ebp, 8(%eax)
+; X32-NEXT:    movl %esi, 12(%eax)
+; X32-NEXT:    popl %esi
+; X32-NEXT:    popl %edi
+; X32-NEXT:    popl %ebx
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl $4
+;
+; X64-LABEL: test_abs_i128:
+; X64:       # BB#0:
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    sbbq %rsi, %rdx
+; X64-NEXT:    testq %rsi, %rsi
+; X64-NEXT:    cmovnsq %rdi, %rax
+; X64-NEXT:    cmovnsq %rsi, %rdx
+; X64-NEXT:    retq
+  %b = call i128 @llvm.abs.i128(i128 %a)
+  ret i128 %b
+}
+
+define <4 x i31> @test_abs_v4i31(<4 x i31> %a) nounwind {
+; X32-SSE2-LABEL: test_abs_v4i31:
+; X32-SSE2:       # BB#0:
+; X32-SSE2-NEXT:    pslld $1, %xmm0
+; X32-SSE2-NEXT:    psrad $1, %xmm0
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    paddd %xmm1, %xmm0
+; X32-SSE2-NEXT:    pxor %xmm1, %xmm0
+; X32-SSE2-NEXT:    retl
+;
+; X32-SSSE3-LABEL: test_abs_v4i31:
+; X32-SSSE3:       # BB#0:
+; X32-SSSE3-NEXT:    pslld $1, %xmm0
+; X32-SSSE3-NEXT:    psrad $1, %xmm0
+; X32-SSSE3-NEXT:    pabsd %xmm0, %xmm0
+; X32-SSSE3-NEXT:    retl
+;
+; X32-AVX-LABEL: test_abs_v4i31:
+; X32-AVX:       # BB#0:
+; X32-AVX-NEXT:    vpslld $1, %xmm0, %xmm0
+; X32-AVX-NEXT:    vpsrad $1, %xmm0, %xmm0
+; X32-AVX-NEXT:    vpabsd %xmm0, %xmm0
+; X32-AVX-NEXT:    retl
+;
+; X64-SSE2-LABEL: test_abs_v4i31:
+; X64-SSE2:       # BB#0:
+; X64-SSE2-NEXT:    pslld $1, %xmm0
+; X64-SSE2-NEXT:    psrad $1, %xmm0
+; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X64-SSE2-NEXT:    psrad $31, %xmm1
+; X64-SSE2-NEXT:    paddd %xmm1, %xmm0
+; X64-SSE2-NEXT:    pxor %xmm1, %xmm0
+; X64-SSE2-NEXT:    retq
+;
+; X64-SSSE3-LABEL: test_abs_v4i31:
+; X64-SSSE3:       # BB#0:
+; X64-SSSE3-NEXT:    pslld $1, %xmm0
+; X64-SSSE3-NEXT:    psrad $1, %xmm0
+; X64-SSSE3-NEXT:    pabsd %xmm0, %xmm0
+; X64-SSSE3-NEXT:    retq
+;
+; X64-AVX-LABEL: test_abs_v4i31:
+; X64-AVX:       # BB#0:
+; X64-AVX-NEXT:    vpslld $1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpsrad $1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpabsd %xmm0, %xmm0
+; X64-AVX-NEXT:    retq
+  %b = call <4 x i31> @llvm.abs.v4i31(<4 x i31> %a)
+  ret <4 x i31> %b
+}
+
+define <2 x i33> @test_abs_v2i33(<2 x i33> %a) nounwind {
+; X32-SSE-LABEL: test_abs_v2i33:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psllq $31, %xmm0
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrad $31, %xmm1
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; X32-SSE-NEXT:    psrlq $31, %xmm0
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrad $31, %xmm1
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-SSE-NEXT:    paddq %xmm1, %xmm0
+; X32-SSE-NEXT:    pxor %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+;
+; X32-AVX1-LABEL: test_abs_v2i33:
+; X32-AVX1:       # BB#0:
+; X32-AVX1-NEXT:    vpsllq $31, %xmm0, %xmm0
+; X32-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
+; X32-AVX1-NEXT:    vpsrlq $31, %xmm0, %xmm0
+; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; X32-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
+; X32-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; X32-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X32-AVX1-NEXT:    retl
+;
+; X32-AVX2-LABEL: test_abs_v2i33:
+; X32-AVX2:       # BB#0:
+; X32-AVX2-NEXT:    vpsllq $31, %xmm0, %xmm0
+; X32-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
+; X32-AVX2-NEXT:    vpsrlq $31, %xmm0, %xmm0
+; X32-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; X32-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
+; X32-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; X32-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X32-AVX2-NEXT:    retl
+;
+; X32-AVX512-LABEL: test_abs_v2i33:
+; X32-AVX512:       # BB#0:
+; X32-AVX512-NEXT:    vpsllq $31, %xmm0, %xmm0
+; X32-AVX512-NEXT:    vpsraq $31, %xmm0, %xmm0
+; X32-AVX512-NEXT:    vpabsq %xmm0, %xmm0
+; X32-AVX512-NEXT:    retl
+;
+; X64-SSE-LABEL: test_abs_v2i33:
+; X64-SSE:       # BB#0:
+; X64-SSE-NEXT:    psllq $31, %xmm0
+; X64-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X64-SSE-NEXT:    psrad $31, %xmm1
+; X64-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; X64-SSE-NEXT:    psrlq $31, %xmm0
+; X64-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X64-SSE-NEXT:    psrad $31, %xmm1
+; X64-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-SSE-NEXT:    paddq %xmm1, %xmm0
+; X64-SSE-NEXT:    pxor %xmm1, %xmm0
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX1-LABEL: test_abs_v2i33:
+; X64-AVX1:       # BB#0:
+; X64-AVX1-NEXT:    vpsllq $31, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
+; X64-AVX1-NEXT:    vpsrlq $31, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; X64-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
+; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_abs_v2i33:
+; X64-AVX2:       # BB#0:
+; X64-AVX2-NEXT:    vpsllq $31, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
+; X64-AVX2-NEXT:    vpsrlq $31, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; X64-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
+; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: test_abs_v2i33:
+; X64-AVX512:       # BB#0:
+; X64-AVX512-NEXT:    vpsllq $31, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpsraq $31, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpabsq %xmm0, %xmm0
+; X64-AVX512-NEXT:    retq
+  %b = call <2 x i33> @llvm.abs.v2i33(<2 x i33> %a)
+  ret <2 x i33> %b
+}
Index: test/CodeGen/X86/viabs.ll
===================================================================
--- test/CodeGen/X86/viabs.ll
+++ test/CodeGen/X86/viabs.ll
@@ -147,14 +147,10 @@
 ;
 ; AVX1-LABEL: test_abs_gt_v8i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
-; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm3
-; AVX1-NEXT:    vpaddd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpabsd %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpabsd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_abs_gt_v8i32:
@@ -193,14 +189,10 @@
 ;
 ; AVX1-LABEL: test_abs_ge_v8i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
-; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm3
-; AVX1-NEXT:    vpaddd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpabsd %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpabsd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_abs_ge_v8i32:
@@ -239,14 +231,10 @@
 ;
 ; AVX1-LABEL: test_abs_gt_v16i16:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpsraw $15, %xmm1, %xmm2
-; AVX1-NEXT:    vpaddw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm3
-; AVX1-NEXT:    vpaddw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpabsw %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpabsw %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_abs_gt_v16i16:
@@ -285,15 +273,10 @@
 ;
 ; AVX1-LABEL: test_abs_lt_v32i8:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm3
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm4
-; AVX1-NEXT:    vpaddb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vxorps %ymm4, %ymm0, %ymm0
+; AVX1-NEXT:    vpabsb %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpabsb %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_abs_lt_v32i8:
@@ -332,14 +315,10 @@
 ;
 ; AVX1-LABEL: test_abs_le_v8i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
-; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm3
-; AVX1-NEXT:    vpaddd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm1
-; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpabsd %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpabsd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_abs_le_v8i32:
@@ -388,22 +367,14 @@
 ;
 ; AVX1-LABEL: test_abs_le_16i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm3
-; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm4
-; AVX1-NEXT:    vpaddd %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT:    vxorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm3
-; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm4
-; AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT:    vxorps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vpabsd %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpabsd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT:    vpabsd %xmm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpabsd %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_abs_le_16i32:
@@ -450,9 +421,7 @@
 ;
 ; AVX512-LABEL: test_abs_ge_v2i64:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpsraq $63, %xmm0, %xmm1
-; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpxorq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpabsq %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %tmp1neg = sub <2 x i64> zeroinitializer, %a
   %b = icmp sge <2 x i64> %a, zeroinitializer
@@ -499,9 +468,7 @@
 ;
 ; AVX512-LABEL: test_abs_gt_v4i64:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpsraq $63, %ymm0, %ymm1
-; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vpxorq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpabsq %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %tmp1neg = sub <4 x i64> zeroinitializer, %a
   %b = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
@@ -691,23 +658,14 @@
 ;
 ; AVX1-LABEL: test_abs_lt_v64i8:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm4
-; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm3, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm5, %ymm6
-; AVX1-NEXT:    vpaddb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vxorps %ymm6, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm4
-; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm3, %xmm3
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm5
-; AVX1-NEXT:    vpaddb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpaddb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vxorps %ymm5, %ymm1, %ymm1
+; AVX1-NEXT:    vpabsb %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpabsb %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT:    vpabsb %xmm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpabsb %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_abs_lt_v64i8:
@@ -763,22 +721,14 @@
 ;
 ; AVX1-LABEL: test_abs_gt_v32i16:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsraw $15, %xmm2, %xmm3
-; AVX1-NEXT:    vpaddw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm4
-; AVX1-NEXT:    vpaddw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT:    vxorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpsraw $15, %xmm2, %xmm3
-; AVX1-NEXT:    vpaddw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpsraw $15, %xmm1, %xmm4
-; AVX1-NEXT:    vpaddw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm2
-; AVX1-NEXT:    vxorps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vpabsw %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpabsw %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT:    vpabsw %xmm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpabsw %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_abs_gt_v32i16:
@@ -802,3 +752,112 @@
   %abs = select <32 x i1> %b, <32 x i16> %a, <32 x i16> %tmp1neg
   ret <32 x i16> %abs
 }
+
+;
+; ISD::ABS Tests
+;
+
+declare <2 x i64> @llvm.abs.v2i64(<2 x i64>) readnone
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>) readnone
+declare <8 x i16> @llvm.abs.v8i16(<8 x i16>) readnone
+declare <16 x i8> @llvm.abs.v16i8(<16 x i8>) readnone
+
+define <2 x i64> @test_abs_v2i64(<2 x i64> %a) nounwind {
+; SSE-LABEL: test_abs_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    paddq %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test_abs_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test_abs_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm1
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_abs_v2i64:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpabsq %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %b = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a)
+  ret <2 x i64> %b
+}
+
+define <4 x i32> @test_abs_v4i32(<4 x i32> %a) nounwind {
+; SSE2-LABEL: test_abs_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    paddd %xmm1, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: test_abs_v4i32:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pabsd %xmm0, %xmm0
+; SSSE3-NEXT:    retq
+;
+; AVX-LABEL: test_abs_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpabsd %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %b = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a)
+  ret <4 x i32> %b
+}
+
+define <8 x i16> @test_abs_v8i16(<8 x i16> %a) nounwind {
+; SSE2-LABEL: test_abs_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psraw $15, %xmm1
+; SSE2-NEXT:    paddw %xmm1, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: test_abs_v8i16:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pabsw %xmm0, %xmm0
+; SSSE3-NEXT:    retq
+;
+; AVX-LABEL: test_abs_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpabsw %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %b = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a)
+  ret <8 x i16> %b
+}
+
+define <16 x i8> @test_abs_v16i8(<16 x i8> %a) nounwind {
+; SSE2-LABEL: test_abs_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
+; SSE2-NEXT:    paddb %xmm1, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: test_abs_v16i8:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pabsb %xmm0, %xmm0
+; SSSE3-NEXT:    retq
+;
+; AVX-LABEL: test_abs_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpabsb %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %b = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a)
+  ret <16 x i8> %b
+}
Index: test/Transforms/InstCombine/abs.ll
===================================================================
--- test/Transforms/InstCombine/abs.ll
+++ test/Transforms/InstCombine/abs.ll
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare   i1 @llvm.abs.i1(i1) readnone
+declare   i8 @llvm.abs.i8(i8) readnone
+declare  i16 @llvm.abs.i16(i16) readnone
+declare  i32 @llvm.abs.i32(i32) readnone
+declare  i64 @llvm.abs.i64(i64) readnone
+declare i128 @llvm.abs.i128(i128) readnone
+
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>) readnone
+declare <2 x i64> @llvm.abs.v2i64(<2 x i64>) readnone
+
+;
+; Folds
+;
+
+define i1 @fold_abs_abs_i1(i1 %a) {
+; CHECK-LABEL: @fold_abs_abs_i1(
+; CHECK-NEXT:    ret i1 %a
+;
+  %1 = call i1 @llvm.abs.i1(i1 %a)
+  %2 = call i1 @llvm.abs.i1(i1 %1)
+  ret i1 %2
+}
+
+define i32 @fold_abs_abs_i32(i32 %a) {
+; CHECK-LABEL: @fold_abs_abs_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 %a)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = call i32 @llvm.abs.i32(i32 %a)
+  %2 = call i32 @llvm.abs.i32(i32 %1)
+  ret i32 %2
+}
+
+define <4 x i32> @fold_abs_abs_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: @fold_abs_abs_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a)
+  %2 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define i16 @fold_abs_mask_i16(i16 %a) {
+; CHECK-LABEL: @fold_abs_mask_i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 %a, 32765
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %1 = and i16 %a, 32765
+  %2 = call i16 @llvm.abs.i16(i16 %1)
+  ret i16 %2
+}
+
+define <4 x i32> @fold_abs_and_v4i32(<4 x i32> %x) {
+; CHECK-LABEL: @fold_abs_and_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> %x, <i32 1, i32 255, i32 65535, i32 2147483647>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = and <4 x i32> %x, <i32 1, i32 255, i32 65535, i32 2147483647>
+  %2 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+;
+; Constant Folding
+;
+
+define i1 @constant_fold_i1_false() {
+; CHECK-LABEL: @constant_fold_i1_false(
+; CHECK-NEXT:    ret i1 false
+;
+  %1 = call i1 @llvm.abs.i1(i1 0)
+  ret i1 %1
+}
+
+define i1 @constant_fold_i1_true() {
+; CHECK-LABEL: @constant_fold_i1_true(
+; CHECK-NEXT:    ret i1 true
+;
+  %1 = call i1 @llvm.abs.i1(i1 -1)
+  ret i1 %1
+}
+
+define i8 @constant_fold_i8() {
+; CHECK-LABEL: @constant_fold_i8(
+; CHECK-NEXT:    ret i8 3
+;
+  %1 = call i8 @llvm.abs.i8(i8 -3)
+  ret i8 %1
+}
+
+define i8 @constant_fold_i8_min() {
+; CHECK-LABEL: @constant_fold_i8_min(
+; CHECK-NEXT:    ret i8 -128
+;
+  %1 = call i8 @llvm.abs.i8(i8 -128)
+  ret i8 %1
+}
+
+define i16 @constant_fold_i16() {
+; CHECK-LABEL: @constant_fold_i16(
+; CHECK-NEXT:    ret i16 555
+;
+  %1 = call i16 @llvm.abs.i16(i16 555)
+  ret i16 %1
+}
+
+define i32 @constant_fold_i32() {
+; CHECK-LABEL: @constant_fold_i32(
+; CHECK-NEXT:    ret i32 32769
+;
+  %1 = call i32 @llvm.abs.i32(i32 -32769)
+  ret i32 %1
+}
+
+define i64 @constant_fold_i64() {
+; CHECK-LABEL: @constant_fold_i64(
+; CHECK-NEXT:    ret i64 65535
+;
+  %1 = call i64 @llvm.abs.i64(i64 65535)
+  ret i64 %1
+}
+
+define i64 @constant_fold_i64_min() {
+; CHECK-LABEL: @constant_fold_i64_min(
+; CHECK-NEXT:    ret i64 -9223372036854775808
+;
+  %1 = call i64 @llvm.abs.i64(i64 -9223372036854775808)
+  ret i64 %1
+}
+
+define i128 @constant_fold_i128() {
+; CHECK-LABEL: @constant_fold_i128(
+; CHECK-NEXT:    ret i128 36893488147419103232
+;
+  %1 = shl i128 1, 65
+  %2 = call i128 @llvm.abs.i128(i128 %1)
+  ret i128 %2
+}
+
+define <4 x i32> @constant_fold_v4i32() {
+; CHECK-LABEL: @constant_fold_v4i32(
+; CHECK-NEXT:    ret <4 x i32> <i32 65535, i32 3, i32 0, i32 16777216>
+;
+  %1 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> <i32 65535, i32 -3, i32 0, i32 -16777216>)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @constant_fold_v2i64() {
+; CHECK-LABEL: @constant_fold_v2i64(
+; CHECK-NEXT:    ret <2 x i64> <i64 65535, i64 3>
+;
+  %1 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> <i64 65535, i64 -3>)
+  ret <2 x i64> %1
+}