Index: llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
===================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -327,7 +327,6 @@
 
     bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
     void transferMemOperands(SDNode *N, SDNode *Result);
-    MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr);
   };
 
 } // end anonymous namespace
@@ -4138,51 +4137,6 @@
   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
 }
 
-/// This method returns a node after flipping the MSB of each element
-/// of vector integer type. Additionally, if SignBitVec is non-null,
-/// this method sets a node with one at MSB of all elements
-/// and zero at other bits in SignBitVec.
-MachineSDNode *
-PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) {
-  SDLoc dl(N);
-  EVT VecVT = N.getValueType();
-  if (VecVT == MVT::v4i32) {
-    if (SignBitVec) {
-      SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32);
-      *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT,
-                                        SDValue(ZV, 0));
-    }
-    return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N);
-  }
-  else if (VecVT == MVT::v8i16) {
-    SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32,
-                                     getI32Imm(0x8000, dl));
-    SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32,
-                                         SDValue(Hi, 0),
-                                         getI32Imm(0x8000, dl));
-    SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT,
-                                         SDValue(ScaImm, 0));
-    /*
-    Alternatively, we can do this as follow to use VRF instead of GPR.
-      vspltish 5, 1
-      vspltish 6, 15
-      vslh 5, 6, 5
-    */
-    if (SignBitVec) *SignBitVec = VecImm;
-    return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N,
-                                  SDValue(VecImm, 0));
-  }
-  else if (VecVT == MVT::v16i8) {
-    SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32,
-                                         getI32Imm(0x80, dl));
-    if (SignBitVec) *SignBitVec = VecImm;
-    return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N,
-                                  SDValue(VecImm, 0));
-  }
-  else
-    llvm_unreachable("Unsupported vector data type for flipSignBit");
-}
-
 // Select - Convert the specified operand from a target-independent to a
 // target-specific node if it hasn't already been changed.
 void PPCDAGToDAGISel::Select(SDNode *N) {
@@ -4993,55 +4947,6 @@
       return;
     }
   }
-  case ISD::ABS: {
-    assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector");
-
-    // For vector absolute difference, we use VABSDUW instruction of POWER9.
-    // Since VABSDU instructions are for unsigned integers, we need adjustment
-    // for signed integers.
-    // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000).
-    // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1.
-    // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000).
-    EVT VecVT = N->getOperand(0).getValueType();
-    SDNode *AbsOp = nullptr;
-    unsigned AbsOpcode;
-
-    if (VecVT == MVT::v4i32)
-      AbsOpcode = PPC::VABSDUW;
-    else if (VecVT == MVT::v8i16)
-      AbsOpcode = PPC::VABSDUH;
-    else if (VecVT == MVT::v16i8)
-      AbsOpcode = PPC::VABSDUB;
-    else
-      llvm_unreachable("Unsupported vector data type for ISD::ABS");
-
-    // Even for signed integers, we can skip adjustment if all values are
-    // known to be positive (as signed integer) due to zero-extended inputs.
-    if (N->getOperand(0).getOpcode() == ISD::SUB &&
-        N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
-        N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) {
-      AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
-                                     SDValue(N->getOperand(0)->getOperand(0)),
-                                     SDValue(N->getOperand(0)->getOperand(1)));
-      ReplaceNode(N, AbsOp);
-      return;
-    }
-    if (N->getOperand(0).getOpcode() == ISD::SUB) {
-      SDValue SubVal = N->getOperand(0);
-      SDNode *Op0 = flipSignBit(SubVal->getOperand(0));
-      SDNode *Op1 = flipSignBit(SubVal->getOperand(1));
-      AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
-                                     SDValue(Op0, 0), SDValue(Op1, 0));
-    }
-    else {
-      SDNode *Op1 = nullptr;
-      SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1);
-      AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0),
-                                     SDValue(Op1, 0));
-    }
-    ReplaceNode(N, AbsOp);
-    return;
-  }
   }
 
   SelectCode(N);
Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
@@ -373,6 +373,21 @@
       /// An SDNode for swaps that are not associated with any loads/stores
       /// and thereby have no chain.
       SWAP_NO_CHAIN,
+      
+      /// An SDNode for Power9 vector absolute value difference.
+      /// operand #0 vector
+      /// operand #1 vector
+      /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
+      /// the most significant bit for signed i32
+      ///
+      /// Power9 VABSD* instructions are designed to support unsigned integer
+      /// vectors (byte/halfword/word), if we want to make use of them for signed
+      /// integer vectors, we have to flip their sign bits first. To flip sign bit
+      /// for byte/halfword integer vector would become inefficient, but for word
+      /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
+      /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000) 
+      ///               => VABSDUW((XVNEGSP a), (XVNEGSP b))
+      VABSD,
 
       /// QVFPERM = This corresponds to the QPX qvfperm instruction.
       QVFPERM,
@@ -998,6 +1013,7 @@
     SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -1101,6 +1117,7 @@
     SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
+    SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
 
     /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
     /// SETCC with integer subtraction when (1) there is a legal way of doing it
Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -251,12 +251,6 @@
     setOperationAction(ISD::UREM, MVT::i64, Expand);
   }
 
-  if (Subtarget.hasP9Vector()) {
-    setOperationAction(ISD::ABS, MVT::v4i32, Legal);
-    setOperationAction(ISD::ABS, MVT::v8i16, Legal);
-    setOperationAction(ISD::ABS, MVT::v16i8, Legal);
-  }
-
   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
@@ -556,6 +550,7 @@
       // add/sub are legal for all supported vector VT's.
       setOperationAction(ISD::ADD, VT, Legal);
       setOperationAction(ISD::SUB, VT, Legal);
+      setOperationAction(ISD::ABS, VT, Custom);
 
       // Vector instructions introduced in P8
       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
@@ -661,6 +656,11 @@
     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 
+    // Without hasP8Altivec set, v2i64 SMAX isn't available.
+    // But ABS custom lowering requires SMAX support.
+    if (!Subtarget.hasP8Altivec())
+      setOperationAction(ISD::ABS, MVT::v2i64, Expand);
+
     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
@@ -1083,6 +1083,10 @@
     setTargetDAGCombine(ISD::FSQRT);
   }
 
+  if (Subtarget.hasP9Altivec()) {
+    setTargetDAGCombine(ISD::ABS);
+  }
+
   // Darwin long double math library functions have $LDBL128 appended.
   if (Subtarget.isDarwin()) {
     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -1343,6 +1347,7 @@
   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
+  case PPCISD::VABSD:           return "PPCISD::VABSD";
   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
@@ -9003,35 +9008,6 @@
     return DAG.getRegister(PPC::R2, MVT::i32);
   }
 
-  // We are looking for absolute values here.
-  // The idea is to try to fit one of two patterns:
-  //  max (a, (0-a))  OR  max ((0-a), a)
-  if (Subtarget.hasP9Vector() &&
-      (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw ||
-       IntrinsicID == Intrinsic::ppc_altivec_vmaxsh ||
-       IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) {
-    SDValue V1 = Op.getOperand(1);
-    SDValue V2 = Op.getOperand(2);
-    if (V1.getSimpleValueType() == V2.getSimpleValueType() &&
-        (V1.getSimpleValueType() == MVT::v4i32 ||
-         V1.getSimpleValueType() == MVT::v8i16 ||
-         V1.getSimpleValueType() == MVT::v16i8)) {
-      if ( V1.getOpcode() == ISD::SUB &&
-           ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
-           V1.getOperand(1) == V2 ) {
-        // Generate the abs instruction with the operands
-        return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2);
-      }
-
-      if ( V2.getOpcode() == ISD::SUB &&
-           ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
-           V2.getOperand(1) == V1 ) {
-        // Generate the abs instruction with the operands
-        return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1);
-      }
-    }
-  }
-
   // If this is a lowered altivec predicate compare, CompareOpc is set to the
   // opcode number of the comparison.
   int CompareOpc;
@@ -9572,6 +9548,44 @@
   }
 }
 
+SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+
+  assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
+
+  EVT VT = Op.getValueType();
+  assert(VT.isVector() &&
+         "Only set vector abs as custom, scalar abs shouldn't reach here!");
+  assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+          VT == MVT::v16i8) &&
+         "Unexpected vector element type!");
+  assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
+         "Current subtarget doesn't support smax v2i64!");
+
+  // For vector abs, it can be lowered to:
+  // abs x
+  // ==>
+  // y = -x
+  // smax(x, y)
+
+  SDLoc dl(Op);
+  SDValue X = Op.getOperand(0);
+  SDValue Zero = DAG.getConstant(0, dl, VT);
+  SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
+
+  // SMAX patch https://reviews.llvm.org/D47332
+  // hasn't landed yet, so use intrinsic first here.
+  // TODO: Should use SMAX directly once SMAX patch landed
+  Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
+  if (VT == MVT::v2i64)
+    BifID = Intrinsic::ppc_altivec_vmaxsd;
+  else if (VT == MVT::v8i16)
+    BifID = Intrinsic::ppc_altivec_vmaxsh;
+  else if (VT == MVT::v16i8)
+    BifID = Intrinsic::ppc_altivec_vmaxsb;
+  
+  return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9624,6 +9638,7 @@
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::MUL:                return LowerMUL(Op, DAG);
+  case ISD::ABS:                return LowerABS(Op, DAG);
 
   // For counter-based loop handling.
   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
@@ -12985,6 +13000,39 @@
           }
         }
       }
+
+      // Combine vmaxsw/h/b(a, a's negation) to abs(a)
+      // Expose the vabsduw/h/b opportunity for down stream
+      if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
+          (IID == Intrinsic::ppc_altivec_vmaxsw ||
+           IID == Intrinsic::ppc_altivec_vmaxsh ||
+           IID == Intrinsic::ppc_altivec_vmaxsb)) {
+        SDValue V1 = N->getOperand(1);
+        SDValue V2 = N->getOperand(2);
+        if ((V1.getSimpleValueType() == MVT::v4i32 ||
+             V1.getSimpleValueType() == MVT::v8i16 ||
+             V1.getSimpleValueType() == MVT::v16i8) &&
+            V1.getSimpleValueType() == V2.getSimpleValueType()) {
+          // (0-a, a)
+          if (V1.getOpcode() == ISD::SUB &&
+              ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
+              V1.getOperand(1) == V2) {
+            return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
+          }
+          // (a, 0-a)
+          if (V2.getOpcode() == ISD::SUB &&
+              ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
+              V2.getOperand(1) == V1) {
+            return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+          }
+          // (x-y, y-x)
+          if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
+              V1.getOperand(0) == V2.getOperand(1) &&
+              V1.getOperand(1) == V2.getOperand(0)) {
+            return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+          }
+        }
+      }
     }
 
     break;
@@ -13217,6 +13265,8 @@
   }
   case ISD::BUILD_VECTOR:
     return DAGCombineBuildVector(N, DCI);
+  case ISD::ABS: 
+    return combineABS(N, DCI);
   }
 
   return SDValue();
@@ -14503,3 +14553,47 @@
   // For non-constant masks, we can always use the record-form and.
   return true;
 }
+
+// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
+SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
+  assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
+  assert(Subtarget.hasP9Altivec() &&
+         "Only combine this when P9 altivec supported!");
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc dl(N);
+  if (N->getOperand(0).getOpcode() == ISD::SUB) {
+    // Even for signed integers, if it's known to be positive (as signed
+    // integer) due to zero-extended inputs.
+    unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
+    unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
+    if ((SubOpcd0 == ISD::ZERO_EXTEND ||
+         SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+        (SubOpcd1 == ISD::ZERO_EXTEND ||
+         SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
+      return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+                         N->getOperand(0)->getOperand(0),
+                         N->getOperand(0)->getOperand(1),
+                         DAG.getTargetConstant(0, dl, MVT::i32));
+    }
+
+    // For type v4i32, it can be optimized with xvnegsp + vabsduw
+    if (N->getOperand(0).getValueType() == MVT::v4i32 &&
+        N->getOperand(0).hasOneUse()) {
+      return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+                         N->getOperand(0)->getOperand(0),
+                         N->getOperand(0)->getOperand(1),
+                         DAG.getTargetConstant(1, dl, MVT::i32));
+    }
+  }
+
+  return SDValue();
+}
+
Index: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
===================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td
@@ -67,6 +67,10 @@
 def SDTVecConv : SDTypeProfile<1, 2, [
   SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>
 ]>;
+def SDTVabsd : SDTypeProfile<1, 3, [
+  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>
+]>;
+
 
 def PPClxvd2x  : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -79,6 +83,7 @@
 def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
 def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
 def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
+def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
 
 multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
                     string asmstr, InstrItinClass itin, Intrinsic Int,
@@ -4017,3 +4022,21 @@
   }
 }
 
+// Put this P9Altivec related definition here since it's possible to be 
+// selected to VSX instruction xvnegsp, avoid possible undef.
+let Predicates = [HasP9Altivec] in {
+
+  def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
+            (v4i32 (VABSDUW $A, $B))>;
+
+  def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
+            (v8i16 (VABSDUH $A, $B))>;
+
+  def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
+            (v16i8 (VABSDUB $A, $B))>;
+
+  // As PPCVABSD description, the last operand indicates whether do the
+  // sign bit flip.
+  def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
+            (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
+}
Index: llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
===================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s
 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-PWR8 -implicit-check-not vabsdu
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-PWR7 -implicit-check-not vmaxsd
 
 define <4 x i32> @simple_absv_32(<4 x i32> %a) local_unnamed_addr {
 entry:
@@ -8,16 +9,21 @@
   %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %a, <4 x i32> %sub.i)
   ret <4 x i32> %0
 ; CHECK-LABEL: simple_absv_32
-; CHECK-DAG: vxor v{{[0-9]+}}, v[[REG:[0-9]+]], v[[REG]]
-; CHECK-DAG: xvnegsp v2, v2
-; CHECK-DAG: xvnegsp v3, v{{[0-9]+}}
-; CHECK-NEXT: vabsduw v2, v2, v{{[0-9]+}}
+; CHECK-NOT:  vxor 
+; CHECK-NOT:  vabsduw
+; CHECK:      vnegw v[[REG:[0-9]+]], v2
+; CHECK-NEXT: vmaxsw v2, v2, v[[REG]]
 ; CHECK-NEXT: blr
 ; CHECK-PWR8-LABEL: simple_absv_32
 ; CHECK-PWR8: xxlxor
 ; CHECK-PWR8: vsubuwm
 ; CHECK-PWR8: vmaxsw
 ; CHECK-PWR8: blr
+; CHECK-PWR7-LABEL: simple_absv_32
+; CHECK-PWR7: xxlxor
+; CHECK-PWR7: vsubuwm
+; CHECK-PWR7: vmaxsw
+; CHECK-PWR7: blr
 }
 
 define <4 x i32> @simple_absv_32_swap(<4 x i32> %a) local_unnamed_addr {
@@ -26,10 +32,10 @@
   %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub.i, <4 x i32> %a)
   ret <4 x i32> %0
 ; CHECK-LABEL: simple_absv_32_swap
-; CHECK-DAG: vxor v{{[0-9]+}}, v[[REG:[0-9]+]], v[[REG]]
-; CHECK-DAG: xvnegsp v2, v2
-; CHECK-DAG: xvnegsp v3, v{{[0-9]+}}
-; CHECK-NEXT: vabsduw v2, v2, v{{[0-9]+}}
+; CHECK-NOT:  vxor 
+; CHECK-NOT:  vabsduw
+; CHECK:      vnegw  v[[REG:[0-9]+]], v2
+; CHECK-NEXT: vmaxsw v2, v2, v[[REG]]
 ; CHECK-NEXT: blr
 ; CHECK-PWR8-LABEL: simple_absv_32_swap
 ; CHECK-PWR8: xxlxor
@@ -44,15 +50,22 @@
   %0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %a, <8 x i16> %sub.i)
   ret <8 x i16> %0
 ; CHECK-LABEL: simple_absv_16
-; CHECK: mtvsrws v{{[0-9]+}}, r{{[0-9]+}}
-; CHECK-NEXT: vadduhm v2, v2, v[[IMM:[0-9]+]]
-; CHECK-NEXT: vabsduh v2, v2, v[[IMM]]
+; CHECK-NOT:  mtvsrws
+; CHECK-NOT:  vabsduh
+; CHECK:      xxlxor v[[ZERO:[0-9]+]], v[[ZERO]], v[[ZERO]]
+; CHECK-NEXT: vsubuhm v[[REG:[0-9]+]], v[[ZERO]], v2
+; CHECK-NEXT: vmaxsh v2, v2, v[[REG]]
 ; CHECK-NEXT: blr
 ; CHECK-PWR8-LABEL: simple_absv_16
 ; CHECK-PWR8: xxlxor
 ; CHECK-PWR8: vsubuhm
 ; CHECK-PWR8: vmaxsh
 ; CHECK-PWR8: blr
+; CHECK-PWR7-LABEL: simple_absv_16
+; CHECK-PWR7: xxlxor
+; CHECK-PWR7: vsubuhm
+; CHECK-PWR7: vmaxsh
+; CHECK-PWR7: blr
 }
 
 define <16 x i8> @simple_absv_8(<16 x i8> %a) local_unnamed_addr {
@@ -61,15 +74,45 @@
   %0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %a, <16 x i8> %sub.i)
   ret <16 x i8> %0
 ; CHECK-LABEL: simple_absv_8
-; CHECK: xxspltib v{{[0-9]+}}, 128
-; CHECK-NEXT: vaddubm v2, v2, v[[IMM:[0-9]+]]
-; CHECK-NEXT: vabsdub v2, v2, v[[IMM]]
+; CHECK-NOT:  xxspltib
+; CHECK-NOT:  vabsdub
+; CHECK:      xxlxor v[[ZERO:[0-9]+]], v[[ZERO]], v[[ZERO]]
+; CHECK-NEXT: vsububm v[[REG:[0-9]+]], v[[ZERO]], v2
+; CHECK-NEXT: vmaxsb v2, v2, v[[REG]]
 ; CHECK-NEXT: blr
 ; CHECK-PWR8-LABEL: simple_absv_8
 ; CHECK-PWR8: xxlxor
 ; CHECK-PWR8: vsububm
 ; CHECK-PWR8: vmaxsb
 ; CHECK-PWR8: blr
+; CHECK-PWR7-LABEL: simple_absv_8
+; CHECK-PWR7: xxlxor
+; CHECK-PWR7: vsububm
+; CHECK-PWR7: vmaxsb
+; CHECK-PWR7: blr
+}
+
+; v2i64 vmax isn't avaiable on pwr7 
+define <2 x i64> @sub_absv_64(<2 x i64> %a, <2 x i64> %b) local_unnamed_addr {
+entry:
+  %0 = sub nsw <2 x i64> %a, %b
+  %1 = icmp sgt <2 x i64> %0, <i64 -1, i64 -1>
+  %2 = sub <2 x i64> zeroinitializer, %0
+  %3 = select <2 x i1> %1, <2 x i64> %0, <2 x i64> %2
+  ret <2 x i64> %3
+; CHECK-LABEL: sub_absv_64
+; CHECK: vsubudm
+; CHECK: vnegd
+; CHECK: vmaxsd
+; CHECK-NEXT: blr
+; CHECK-PWR8-LABEL: sub_absv_64
+; CHECK-PWR8-DAG: vsubudm
+; CHECK-PWR8-DAG: xxlxor
+; CHECK-PWR8: vmaxsd
+; CHECK-PWR8: blr
+; CHECK-PWR7-LABEL: sub_absv_64
+; CHECK-PWR7-NOT: vmaxsd
+; CHECK-PWR7: blr
 }
 
 ; The select pattern can only be detected for v4i32.
@@ -81,14 +124,77 @@
   %3 = select <4 x i1> %1, <4 x i32> %0, <4 x i32> %2
   ret <4 x i32> %3
 ; CHECK-LABEL: sub_absv_32
-; CHECK-DAG: xvnegsp v3, v3
-; CHECK-DAG: xvnegsp v2, v2
-; CHECK-NEXT: vabsduw v2, v2, v3
+; CHECK-NOT:  vsubuwm
+; CHECK-NOT:  vnegw
+; CHECK-NOT:  vmaxsw
+; CHECK-DAG:  xvnegsp v2, v2
+; CHECK-DAG:  xvnegsp v3, v3
+; CHECK-NEXT: vabsduw v2, v{{[23]}}, v{{[23]}}
 ; CHECK-NEXT: blr
 ; CHECK-PWR8-LABEL: sub_absv_32
-; CHECK-PWR8: vsubuwm
-; CHECK-PWR8: xxlxor
+; CHECK-PWR8-DAG: vsubuwm
+; CHECK-PWR8-DAG: xxlxor
+; CHECK-PWR8: vmaxsw
 ; CHECK-PWR8: blr
+; CHECK-PWR7-LABEL: sub_absv_32
+; CHECK-PWR7-DAG: vsubuwm
+; CHECK-PWR7-DAG: xxlxor
+; CHECK-PWR7: vmaxsw
+; CHECK-PWR7: blr
+}
+
+define <8 x i16> @sub_absv_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
+entry:
+  %0 = sub nsw <8 x i16> %a, %b
+  %1 = icmp sgt <8 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %2 = sub <8 x i16> zeroinitializer, %0
+  %3 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> %2
+  ret <8 x i16> %3
+; CHECK-LABEL: sub_absv_16
+; CHECK-NOT:  vabsduh
+; CHECK-DAG:  xxlxor v[[ZERO:[0-9]+]], v[[ZERO]], v[[ZERO]]
+; CHECK-DAG:  vsubuhm v[[SUB:[0-9]+]], v2, v3
+; CHECK:      vsubuhm v[[SUB1:[0-9]+]], v[[ZERO]], v[[SUB]]
+; CHECK-NEXT: vmaxsh v2, v[[SUB]], v[[SUB1]]
+; CHECK-NEXT: blr
+; CHECK-PWR8-LABEL: sub_absv_16
+; CHECK-PWR8-DAG:  xxlxor v[[ZERO:[0-9]+]], v[[ZERO]], v[[ZERO]]
+; CHECK-PWR8-DAG:  vsubuhm v[[SUB:[0-9]+]], v2, v3
+; CHECK-PWR8:      vsubuhm v[[SUB1:[0-9]+]], v[[ZERO]], v[[SUB]]
+; CHECK-PWR8-NEXT: vmaxsh v2, v[[SUB]], v[[SUB1]]
+; CHECK-PWR8-NEXT: blr
+; CHECK-PWR7-LABEL: sub_absv_16
+; CHECK-PWR7-DAG: vsubuhm
+; CHECK-PWR7-DAG: xxlxor
+; CHECK-PWR7: vmaxsh
+; CHECK-PWR7-NEXT: blr
+}
+
+define <16 x i8> @sub_absv_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
+entry:
+  %0 = sub nsw <16 x i8> %a, %b
+  %1 = icmp sgt <16 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %2 = sub <16 x i8> zeroinitializer, %0
+  %3 = select <16 x i1> %1, <16 x i8> %0, <16 x i8> %2
+  ret <16 x i8> %3
+; CHECK-LABEL: sub_absv_8
+; CHECK-NOT:  vabsdub
+; CHECK-DAG:  xxlxor v[[ZERO:[0-9]+]], v[[ZERO]], v[[ZERO]]
+; CHECK-DAG:  vsububm v[[SUB:[0-9]+]], v2, v3
+; CHECK:      vsububm v[[SUB1:[0-9]+]], v[[ZERO]], v[[SUB]]
+; CHECK-NEXT: vmaxsb v2, v[[SUB]], v[[SUB1]]
+; CHECK-NEXT: blr
+; CHECK-PWR8-LABEL: sub_absv_8
+; CHECK-PWR8-DAG:  xxlxor v[[ZERO:[0-9]+]], v[[ZERO]], v[[ZERO]]
+; CHECK-PWR8-DAG:  vsububm v[[SUB:[0-9]+]], v2, v3
+; CHECK-PWR8:      vsububm v[[SUB1:[0-9]+]], v[[ZERO]], v[[SUB]]
+; CHECK-PWR8-NEXT: vmaxsb v2, v[[SUB]], v[[SUB1]]
+; CHECK-PWR8-NEXT: blr
+; CHECK-PWR7-LABEL: sub_absv_8
+; CHECK-PWR7-DAG:  xxlxor
+; CHECK-PWR7-DAG:  vsububm
+; CHECK-PWR7: vmaxsb
+; CHECK-PWR7-NEXT: blr
 }
 
 ; FIXME: This does not produce the ISD::ABS that we are looking for.
@@ -96,7 +202,7 @@
 ; We do manage to find the word version of ABS but not the halfword.
 ; Threfore, we end up doing more work than is required with a pair of abs for word
 ;  instead of just one for the halfword.
-define <8 x i16> @sub_absv_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
+define <8 x i16> @sub_absv_16_ext(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
 entry:
   %0 = sext <8 x i16> %a to <8 x i32>
   %1 = sext <8 x i16> %b to <8 x i32>
@@ -106,23 +212,25 @@
   %5 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> %4
   %6 = trunc <8 x i32> %5 to <8 x i16>
   ret <8 x i16> %6
-; CHECK-LABEL: sub_absv_16
+; CHECK-LABEL: sub_absv_16_ext
 ; CHECK-NOT: vabsduh
 ; CHECK: vabsduw
+; CHECK-NOT: vnegw
 ; CHECK-NOT: vabsduh
 ; CHECK: vabsduw
+; CHECK-NOT: vnegw
 ; CHECK-NOT: vabsduh
 ; CHECK: blr
 ; CHECK-PWR8-LABEL: sub_absv_16
-; CHECK-PWR8: vsubuwm
-; CHECK-PWR8: xxlxor
+; CHECK-PWR8-DAG: vsubuwm
+; CHECK-PWR8-DAG: xxlxor
 ; CHECK-PWR8: blr
 }
 
 ; FIXME: This does not produce ISD::ABS. This does not even vectorize correctly!
 ; This function should look like sub_absv_32 and sub_absv_16 except that the type is v16i8.
 ; Function Attrs: norecurse nounwind readnone
-define <16 x i8> @sub_absv_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
+define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
 entry:
   %vecext = extractelement <16 x i8> %a, i32 0
   %conv = zext i8 %vecext to i32
@@ -285,14 +393,14 @@
   %conv122 = trunc i32 %15 to i8
   %vecins123 = insertelement <16 x i8> %vecins115, i8 %conv122, i32 15
   ret <16 x i8> %vecins123
-; CHECK-LABEL: sub_absv_8
+; CHECK-LABEL: sub_absv_8_ext
 ; CHECK-NOT: vabsdub
 ; CHECK: subf
 ; CHECK-NOT: vabsdub
 ; CHECK: xor
 ; CHECK-NOT: vabsdub
 ; CHECK: blr
-; CHECK-PWR8-LABEL: sub_absv_8
+; CHECK-PWR8-LABEL: sub_absv_8_ext
 ; CHECK-PWR8: subf
 ; CHECK-PWR8: xor
 ; CHECK-PWR8: blr
@@ -305,11 +413,16 @@
   %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub, <4 x i32> %sub.i)
   ret <4 x i32> %0
 ; CHECK-LABEL: sub_absv_vec_32
-; CHECK: vabsduw v2, v2, v3
+; CHECK-NOT:  vsubuwm
+; CHECK-NOT:  vnegw
+; CHECK-NOT:  vmaxsw
+; CHECK-DAG:  xvnegsp v2, v2
+; CHECK-DAG:  xvnegsp v3, v3
+; CHECK-NEXT: vabsduw v2, v{{[23]}}, v{{[23]}}
 ; CHECK-NEXT: blr
 ; CHECK-PWR8-LABEL: sub_absv_vec_32
-; CHECK-PWR8: xxlxor
-; CHECK-PWR8: vsubuwm
+; CHECK-PWR8-DAG: xxlxor
+; CHECK-PWR8-DAG: vsubuwm
 ; CHECK-PWR8: vmaxsw
 ; CHECK-PWR8: blr
 }
@@ -321,11 +434,16 @@
   %0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %sub, <8 x i16> %sub.i)
   ret <8 x i16> %0
 ; CHECK-LABEL: sub_absv_vec_16
-; CHECK: vabsduh v2, v2, v3
+; CHECK-NOT:  mtvsrws
+; CHECK-NOT:  vabsduh
+; CHECK-DAG:  xxlxor v[[ZERO:[0-9]+]], v[[ZERO]], v[[ZERO]]
+; CHECK-DAG:  vsubuhm v[[SUB:[0-9]+]], v2, v3
+; CHECK:      vsubuhm v[[SUB1:[0-9]+]], v[[ZERO]], v[[SUB]]
+; CHECK-NEXT: vmaxsh v2, v[[SUB]], v[[SUB1]]
 ; CHECK-NEXT: blr
 ; CHECK-PWR8-LABEL: sub_absv_vec_16
-; CHECK-PWR8: xxlxor
-; CHECK-PWR8: vsubuhm
+; CHECK-PWR8-DAG: xxlxor
+; CHECK-PWR8-DAG: vsubuhm
 ; CHECK-PWR8: vmaxsh
 ; CHECK-PWR8: blr
 }
@@ -337,15 +455,67 @@
   %0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %sub, <16 x i8> %sub.i)
   ret <16 x i8> %0
 ; CHECK-LABEL: sub_absv_vec_8
-; CHECK: vabsdub v2, v2, v3
+; CHECK-NOT:  xxspltib
+; CHECK-NOT:  vabsdub
+; CHECK-DAG:  xxlxor v[[ZERO:[0-9]+]], v[[ZERO]], v[[ZERO]]
+; CHECK-DAG:  vsububm v[[SUB:[0-9]+]], v2, v3
+; CHECK:      vsububm v[[SUB1:[0-9]+]], v[[ZERO]], v[[SUB]]
+; CHECK-NEXT: vmaxsb v2, v[[SUB]], v[[SUB1]]
 ; CHECK-NEXT: blr
 ; CHECK-PWR8-LABEL: sub_absv_vec_8
-; CHECK-PWR8: xxlxor
-; CHECK-PWR8: vsububm
+; CHECK-PWR8-DAG: xxlxor
+; CHECK-PWR8-DAG: vsububm
 ; CHECK-PWR8: vmaxsb
 ; CHECK-PWR8: blr
 }
 
+define <4 x i32> @zext_sub_absd32(<4 x i16>, <4 x i16>) local_unnamed_addr {
+    %3 = zext <4 x i16> %0 to <4 x i32>
+    %4 = zext <4 x i16> %1 to <4 x i32>
+    %5 = sub <4 x i32> %3, %4
+    %6 = sub <4 x i32> zeroinitializer, %5
+    %7 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %5, <4 x i32> %6)
+    ret <4 x i32> %7
+; CHECK-LABEL: zext_sub_absd32
+; CHECK-NOT: xvnegsp
+; CHECK:     vabsduw
+; CHECK:     blr
+; CHECK-PWR8-LABEL: zext_sub_absd32
+; CHECK-PWR8: vmaxsw
+; CHECK-PWR8: blr
+}
+
+define <8 x i16> @zext_sub_absd16(<8 x i8>, <8 x i8>) local_unnamed_addr {
+    %3 = zext <8 x i8> %0 to <8 x i16>
+    %4 = zext <8 x i8> %1 to <8 x i16>
+    %5 = sub <8 x i16> %3, %4
+    %6 = sub <8 x i16> zeroinitializer, %5
+    %7 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %5, <8 x i16> %6)
+    ret <8 x i16> %7
+; CHECK-LABEL: zext_sub_absd16
+; CHECK-NOT: vadduhm
+; CHECK:     vabsduh
+; CHECK:     blr
+; CHECK-PWR8-LABEL: zext_sub_absd16
+; CHECK-PWR8: vmaxsh
+; CHECK-PWR8: blr
+}
+
+define <16 x i8> @zext_sub_absd8(<16 x i4>, <16 x i4>) local_unnamed_addr {
+    %3 = zext <16 x i4> %0 to <16 x i8>
+    %4 = zext <16 x i4> %1 to <16 x i8>
+    %5 = sub <16 x i8> %3, %4
+    %6 = sub <16 x i8> zeroinitializer, %5
+    %7 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %5, <16 x i8> %6)
+    ret <16 x i8> %7
+; CHECK-LABEL: zext_sub_absd8
+; CHECK-NOT: vaddubm
+; CHECK:     vabsdub
+; CHECK:     blr
+; CHECK-PWR8-LABEL: zext_sub_absd8
+; CHECK-PWR8: vmaxsb
+; CHECK-PWR8: blr
+}
 
 declare <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32>, <4 x i32>)
 
Index: llvm/trunk/test/CodeGen/PowerPC/pre-inc-disable.ll
===================================================================
--- llvm/trunk/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ llvm/trunk/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -181,12 +181,8 @@
 ; CHECK:    vperm v4, v0, v4, v3
 ; CHECK:    vperm v2, v5, v0, v2
 ; CHECK:    vperm v3, v0, v5, v3
-; CHECK:    xvnegsp v5, v1
-; CHECK:    xvnegsp v4, v4
-; CHECK:    xvnegsp v2, v2
-; CHECK:    xvnegsp v3, v3
 ; CHECK:    vabsduw v3, v4, v3
-; CHECK:    vabsduw v2, v5, v2
+; CHECK:    vabsduw v2, v1, v2
 ; CHECK:    vadduwm v2, v2, v3
 ; CHECK:    xxswapd v3, v2
 ; CHECK:    vadduwm v2, v2, v3
@@ -212,12 +208,8 @@
 ; P9BE:    vperm v4, v5, v4, v3
 ; P9BE:    vperm v2, v5, v0, v2
 ; P9BE:    vperm v3, v5, v0, v3
-; P9BE:    xvnegsp v5, v1
-; P9BE:    xvnegsp v4, v4
-; P9BE:    xvnegsp v2, v2
-; P9BE:    xvnegsp v3, v3
 ; P9BE:    vabsduw v3, v4, v3
-; P9BE:    vabsduw v2, v5, v2
+; P9BE:    vabsduw v2, v1, v2
 ; P9BE:    vadduwm v2, v2, v3
 ; P9BE:    xxswapd v3, v2
 ; P9BE:    vadduwm v2, v2, v3