Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
===================================================================
--- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -330,6 +330,9 @@
   // to X.
   bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const;
 
+  // Try to expand a boolean SELECT_CCMASK using an IPM sequence.
+  SDValue expandSelectBoolean(SDNode *Node);
+
 public:
   SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
       : SelectionDAGISel(TM, OptLevel) {}
@@ -348,6 +351,7 @@
   void Select(SDNode *Node) override;
   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
+  void PreprocessISelDAG() override;
 
   // Include the pieces autogenerated from the target description.
   #include "SystemZGenDAGISel.inc"
@@ -1438,3 +1442,183 @@
 
   return true;
 }
+
+namespace {
+// Represents a sequence for extracting a 0/1 value from an IPM result:
+// (((X ^ XORValue) + AddValue) >> Bit)
+struct IPMConversion {
+  IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
+    : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
+
+  int64_t XORValue;
+  int64_t AddValue;
+  unsigned Bit;
+};
+} // end anonymous namespace
+
+// Return a sequence for getting a 1 from an IPM result when CC has a
+// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
+// The handling of CC values outside CCValid doesn't matter.
+static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
+  // Deal with cases where the result can be taken directly from a bit
+  // of the IPM result.
+  if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
+    return IPMConversion(0, 0, SystemZ::IPM_CC);
+  if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
+    return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
+
+  // Deal with cases where we can add a value to force the sign bit
+  // to contain the right value.  Putting the bit in 31 means we can
+  // use SRL rather than RISBG(L), and also makes it easier to get a
+  // 0/-1 value, so it has priority over the other tests below.
+  //
+  // These sequences rely on the fact that the upper two bits of the
+  // IPM result are zero.
+  uint64_t TopBit = uint64_t(1) << 31;
+  if (CCMask == (CCValid & SystemZ::CCMASK_0))
+    return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
+  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
+    return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
+  if (CCMask == (CCValid & (SystemZ::CCMASK_0
+                            | SystemZ::CCMASK_1
+                            | SystemZ::CCMASK_2)))
+    return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
+  if (CCMask == (CCValid & SystemZ::CCMASK_3))
+    return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
+  if (CCMask == (CCValid & (SystemZ::CCMASK_1
+                            | SystemZ::CCMASK_2
+                            | SystemZ::CCMASK_3)))
+    return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
+
+  // Next try inverting the value and testing a bit.  0/1 could be
+  // handled this way too, but we dealt with that case above.
+  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
+    return IPMConversion(-1, 0, SystemZ::IPM_CC);
+
+  // Handle cases where adding a value forces a non-sign bit to contain
+  // the right value.
+  if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
+    return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
+  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
+    return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
+
+  // The remaining cases are 1, 2, 0/1/3 and 0/2/3.  All these are
+  // can be done by inverting the low CC bit and applying one of the
+  // sign-based extractions above.
+  if (CCMask == (CCValid & SystemZ::CCMASK_1))
+    return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
+  if (CCMask == (CCValid & SystemZ::CCMASK_2))
+    return IPMConversion(1 << SystemZ::IPM_CC,
+                         TopBit - (3 << SystemZ::IPM_CC), 31);
+  if (CCMask == (CCValid & (SystemZ::CCMASK_0
+                            | SystemZ::CCMASK_1
+                            | SystemZ::CCMASK_3)))
+    return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
+  if (CCMask == (CCValid & (SystemZ::CCMASK_0
+                            | SystemZ::CCMASK_2
+                            | SystemZ::CCMASK_3)))
+    return IPMConversion(1 << SystemZ::IPM_CC,
+                         TopBit - (1 << SystemZ::IPM_CC), 31);
+
+  llvm_unreachable("Unexpected CC combination");
+}
+
+SDValue SystemZDAGToDAGISel::expandSelectBoolean(SDNode *Node) {
+  auto *TrueOp = dyn_cast<ConstantSDNode>(Node->getOperand(0));
+  auto *FalseOp = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+  if (!TrueOp || !FalseOp)
+    return SDValue();
+  if (FalseOp->getZExtValue() != 0)
+    return SDValue();
+  if (TrueOp->getSExtValue() != 1 && TrueOp->getSExtValue() != -1)
+    return SDValue();
+
+  auto *CCValidOp = dyn_cast<ConstantSDNode>(Node->getOperand(2));
+  auto *CCMaskOp = dyn_cast<ConstantSDNode>(Node->getOperand(3));
+  if (!CCValidOp || !CCMaskOp)
+    return SDValue();
+  int CCValid = CCValidOp->getZExtValue();
+  int CCMask = CCMaskOp->getZExtValue();
+
+  SDLoc DL(Node);
+  SDValue Glue = Node->getOperand(4);
+  IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
+  SDValue Result = CurDAG->getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+
+  if (Conversion.XORValue) {
+    SDValue XORValue = CurDAG->getConstant(Conversion.XORValue, DL, MVT::i32);
+    Result = CurDAG->getNode(ISD::XOR, DL, MVT::i32, Result, XORValue);
+  }
+
+  if (Conversion.AddValue) {
+    SDValue AddValue = CurDAG->getConstant(Conversion.AddValue, DL, MVT::i32);
+    Result = CurDAG->getNode(ISD::ADD, DL, MVT::i32, Result, AddValue);
+  }
+
+  if (Node->getValueType(0) == MVT::i32 && Conversion.Bit == 31) {
+    unsigned ShiftOp = TrueOp->getSExtValue() == 1 ? ISD::SRL : ISD::SRA;
+    SDValue Bit = CurDAG->getConstant(Conversion.Bit, DL, MVT::i32);
+    Result = CurDAG->getNode(ShiftOp, DL, MVT::i32, Result, Bit);
+  } else {
+    EVT VT = Node->getValueType(0);
+    if (VT != MVT::i32)
+      Result = CurDAG->getNode(ISD::ANY_EXTEND, DL, VT, Result);
+
+    if (TrueOp->getSExtValue() == 1) {
+      // The SHR/AND sequence should get optimized to an RISBG.
+      SDValue Bit = CurDAG->getConstant(Conversion.Bit, DL, MVT::i32);
+      Result = CurDAG->getNode(ISD::SRL, DL, VT, Result, Bit);
+      SDValue One = CurDAG->getConstant(1, DL, VT);
+      Result = CurDAG->getNode(ISD::AND, DL, VT, Result, One);
+    } else {
+      int Size = VT.getSizeInBits();
+      SDValue SizeMBit
+        = CurDAG->getConstant(Size - 1 - Conversion.Bit, DL, MVT::i32);
+      Result = CurDAG->getNode(ISD::SHL, DL, VT, Result, SizeMBit);
+      SDValue SizeMOne = CurDAG->getConstant(Size - 1, DL, MVT::i32);
+      Result = CurDAG->getNode(ISD::SRA, DL, VT, Result, SizeMOne);
+    }
+  }
+
+  return Result;
+}
+
+void SystemZDAGToDAGISel::PreprocessISelDAG() {
+  // If we have conditional immediate loads, we always prefer
+  // using those over an IPM sequence.
+  if (Subtarget->hasLoadStoreOnCond2())
+    return;
+
+  bool MadeChange = false;
+
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+                                       E = CurDAG->allnodes_end();
+       I != E;) {
+    SDNode *N = &*I++;
+    if (N->use_empty())
+      continue;
+
+    SDValue Res;
+    switch (N->getOpcode()) {
+    default: break;
+    case SystemZISD::SELECT_CCMASK:
+      Res = expandSelectBoolean(N);
+      break;
+    }
+
+    if (Res) {
+      DEBUG(dbgs() << "SystemZ DAG preprocessing replacing:\nOld:    ");
+      DEBUG(N->dump(CurDAG));
+      DEBUG(dbgs() << "\nNew: ");
+      DEBUG(Res.getNode()->dump(CurDAG));
+      DEBUG(dbgs() << "\n");
+
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+      MadeChange = true;
+    }
+  }
+
+  if (MadeChange)
+    CurDAG->RemoveDeadNodes();
+}
+
Index: lib/Target/SystemZ/SystemZISelLowering.h
===================================================================
--- lib/Target/SystemZ/SystemZISelLowering.h
+++ lib/Target/SystemZ/SystemZISelLowering.h
@@ -490,6 +490,14 @@
                       SelectionDAG &DAG) const override;
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
+  /// Determine which of the bits specified in Mask are known to be either
+  /// zero or one and return them in the KnownZero/KnownOne bitsets.
+  void computeKnownBitsForTargetNode(const SDValue Op,
+                                     KnownBits &Known,
+                                     const APInt &DemandedElts,
+                                     const SelectionDAG &DAG,
+                                     unsigned Depth = 0) const override;
+
   ISD::NodeType getExtendForAtomicOps() const override {
     return ISD::ANY_EXTEND;
   }
@@ -563,7 +571,9 @@
                          bool Force) const;
   SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op,
                                  DAGCombinerInfo &DCI) const;
+  SDValue combineZERO_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -571,6 +581,8 @@
   SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
 
   // If the last instruction before MBBI in MBB was some form of COMPARE,
   // try to replace it with a COMPARE AND BRANCH just before MBBI.
Index: lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- lib/Target/SystemZ/SystemZISelLowering.cpp
+++ lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -31,17 +31,6 @@
 #define DEBUG_TYPE "systemz-lower"
 
 namespace {
-// Represents a sequence for extracting a 0/1 value from an IPM result:
-// (((X ^ XORValue) + AddValue) >> Bit)
-struct IPMConversion {
-  IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
-    : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
-
-  int64_t XORValue;
-  int64_t AddValue;
-  unsigned Bit;
-};
-
 // Represents information about a comparison.
 struct Comparison {
   Comparison(SDValue Op0In, SDValue Op1In)
@@ -517,7 +506,9 @@
   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
 
   // Codes for which we want to perform some z-specific combinations.
+  setTargetDAGCombine(ISD::ZERO_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND);
+  setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::FP_ROUND);
@@ -1699,73 +1690,6 @@
 #undef CONV
 }
 
-// Return a sequence for getting a 1 from an IPM result when CC has a
-// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
-// The handling of CC values outside CCValid doesn't matter.
-static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
-  // Deal with cases where the result can be taken directly from a bit
-  // of the IPM result.
-  if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
-    return IPMConversion(0, 0, SystemZ::IPM_CC);
-  if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
-    return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
-
-  // Deal with cases where we can add a value to force the sign bit
-  // to contain the right value.  Putting the bit in 31 means we can
-  // use SRL rather than RISBG(L), and also makes it easier to get a
-  // 0/-1 value, so it has priority over the other tests below.
-  //
-  // These sequences rely on the fact that the upper two bits of the
-  // IPM result are zero.
-  uint64_t TopBit = uint64_t(1) << 31;
-  if (CCMask == (CCValid & SystemZ::CCMASK_0))
-    return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
-  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
-    return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
-  if (CCMask == (CCValid & (SystemZ::CCMASK_0
-                            | SystemZ::CCMASK_1
-                            | SystemZ::CCMASK_2)))
-    return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
-  if (CCMask == (CCValid & SystemZ::CCMASK_3))
-    return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
-  if (CCMask == (CCValid & (SystemZ::CCMASK_1
-                            | SystemZ::CCMASK_2
-                            | SystemZ::CCMASK_3)))
-    return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
-
-  // Next try inverting the value and testing a bit.  0/1 could be
-  // handled this way too, but we dealt with that case above.
-  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
-    return IPMConversion(-1, 0, SystemZ::IPM_CC);
-
-  // Handle cases where adding a value forces a non-sign bit to contain
-  // the right value.
-  if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
-    return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
-  if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
-    return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
-
-  // The remaining cases are 1, 2, 0/1/3 and 0/2/3.  All these are
-  // can be done by inverting the low CC bit and applying one of the
-  // sign-based extractions above.
-  if (CCMask == (CCValid & SystemZ::CCMASK_1))
-    return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
-  if (CCMask == (CCValid & SystemZ::CCMASK_2))
-    return IPMConversion(1 << SystemZ::IPM_CC,
-                         TopBit - (3 << SystemZ::IPM_CC), 31);
-  if (CCMask == (CCValid & (SystemZ::CCMASK_0
-                            | SystemZ::CCMASK_1
-                            | SystemZ::CCMASK_3)))
-    return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
-  if (CCMask == (CCValid & (SystemZ::CCMASK_0
-                            | SystemZ::CCMASK_2
-                            | SystemZ::CCMASK_3)))
-    return IPMConversion(1 << SystemZ::IPM_CC,
-                         TopBit - (1 << SystemZ::IPM_CC), 31);
-
-  llvm_unreachable("Unexpected CC combination");
-}
-
 // If C can be converted to a comparison against zero, adjust the operands
 // as necessary.
 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
@@ -2237,6 +2161,24 @@
   C.CCMask = NewCCMask;
 }
 
+// See whether the comparison argument contains a redundant AND
+// and remove it if so.  This sometimes happens due to the generic
+// BRCOND expansion.
+static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
+                                  Comparison &C) {
+  if (C.Op0.getOpcode() != ISD::AND)
+    return;
+  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
+  if (!Mask)
+    return;
+  KnownBits Known;
+  DAG.computeKnownBits(C.Op0.getOperand(0), Known);
+  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
+    return;
+
+  C.Op0 = C.Op0.getOperand(0);
+}
+
 // Return a Comparison that tests the condition-code result of intrinsic
 // node Call against constant integer CC using comparison code Cond.
 // Opcode is the opcode of the SystemZISD operation for the intrinsic
@@ -2311,6 +2253,7 @@
     else
       C.ICmpType = SystemZICMP::SignedOnly;
     C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
+    adjustForRedundantAnd(DAG, DL, C);
     adjustZeroCmp(DAG, DL, C);
     adjustSubwordCmp(DAG, DL, C);
     adjustForSubtraction(DAG, DL, C);
@@ -2388,24 +2331,11 @@
 // in CCValid, so other values can be ignored.
 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue,
                          unsigned CCValid, unsigned CCMask) {
-  IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
-  SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
-
-  if (Conversion.XORValue)
-    Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
-                         DAG.getConstant(Conversion.XORValue, DL, MVT::i32));
-
-  if (Conversion.AddValue)
-    Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
-                         DAG.getConstant(Conversion.AddValue, DL, MVT::i32));
-
-  // The SHR/AND sequence should get optimized to an RISBG.
-  Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
-                       DAG.getConstant(Conversion.Bit, DL, MVT::i32));
-  if (Conversion.Bit != 31)
-    Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
-                         DAG.getConstant(1, DL, MVT::i32));
-  return Result;
+  SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32),
+                    DAG.getConstant(0, DL, MVT::i32),
+                    DAG.getConstant(CCValid, DL, MVT::i32),
+                    DAG.getConstant(CCMask, DL, MVT::i32), Glue };
+  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
 }
 
 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
@@ -2620,35 +2550,10 @@
   }
 
   SDValue Glue = emitCmp(DAG, DL, C);
-
-  // Special case for handling -1/0 results.  The shifts we use here
-  // should get optimized with the IPM conversion sequence.
-  auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp);
-  auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp);
-  if (TrueC && FalseC) {
-    int64_t TrueVal = TrueC->getSExtValue();
-    int64_t FalseVal = FalseC->getSExtValue();
-    if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) {
-      // Invert the condition if we want -1 on false.
-      if (TrueVal == 0)
-        C.CCMask ^= C.CCValid;
-      SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
-      EVT VT = Op.getValueType();
-      // Extend the result to VT.  Upper bits are ignored.
-      if (!is32Bit(VT))
-        Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result);
-      // Sign-extend from the low bit.
-      SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32);
-      SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt);
-      return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt);
-    }
-  }
-
   SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
                    DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
 
-  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
-  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
+  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
 }
 
 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
@@ -5174,6 +5079,54 @@
   return SDValue();
 }
 
+SDValue SystemZTargetLowering::combineZERO_EXTEND(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
+    auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+    auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (TrueOp && FalseOp) {
+      SDLoc DL(N0);
+      SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
+                        DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
+                        N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
+      SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
+      // If N0 has multiple uses, change other uses as well.
+      if (!N0.hasOneUse()) {
+        SDValue TruncSelect =
+          DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
+        DCI.CombineTo(N0.getNode(), TruncSelect);
+      }
+      return NewSelect;
+    }
+  }
+  return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
+  // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
+  // into (select_cc LHS, RHS, -1, 0, COND)
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
+    N0 = N0.getOperand(0);
+  if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
+    SDLoc DL(N0);
+    SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
+                      DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
+                      N0.getOperand(2) };
+    return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
+  }
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::combineSIGN_EXTEND(
     SDNode *N, DAGCombinerInfo &DCI) const {
   // Convert (sext (ashr (shl X, C1), C2)) to
@@ -5475,11 +5428,116 @@
   return SDValue();
 }
 
+static bool combineCCMask(SDValue &Glue, int &CCValid, int &CCMask) {
+  // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
+  // set by the glued instruction using the CCValid / CCMask masks,
+  // If the glued instruction is itself a (ICMP (SELECT_CCMASK)) testing
+  // the condition code set by some other instruction, see whether we
+  // can directly use that condition code.
+  bool Invert = false;
+
+  // Verify that we have an appropriate mask for a EQ or NE comparison.
+  if (CCValid != SystemZ::CCMASK_ICMP)
+    return false;
+  if (CCMask == SystemZ::CCMASK_CMP_NE)
+    Invert = !Invert;
+  else if (CCMask != SystemZ::CCMASK_CMP_EQ)
+    return false;
+
+  // Verify that we have an ICMP that is the single user of a SELECT_CCMASK.
+  SDNode *ICmp = Glue.getNode();
+  if (ICmp->getOpcode() != SystemZISD::ICMP)
+    return false;
+  SDNode *Select = ICmp->getOperand(0).getNode();
+  if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
+    return false;
+  if (!Select->hasOneUse())
+    return false;
+
+  // Verify that the ICMP compares against one of select values.
+  auto *CompareVal = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
+  if (!CompareVal)
+    return false;
+  auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
+  if (!TrueVal)
+    return false;
+  auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
+  if (!FalseVal)
+    return false;
+  if (CompareVal->getZExtValue() == FalseVal->getZExtValue())
+    Invert = !Invert;
+  else if (CompareVal->getZExtValue() != TrueVal->getZExtValue())
+    return false;
+
+  // Compute the effective CC mask for the new branch or select.
+  auto *NewCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
+  auto *NewCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
+  if (!NewCCValid || !NewCCMask)
+    return false;
+  CCValid = NewCCValid->getZExtValue();
+  CCMask = NewCCMask->getZExtValue();
+  if (Invert)
+    CCMask ^= CCValid;
+
+  // Return the updated Glue link.
+  Glue = Select->getOperand(4);
+  return true;
+}
+
+SDValue SystemZTargetLowering::combineBR_CCMASK(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
+  auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
+  if (!CCValid || !CCMask)
+    return SDValue();
+
+  int CCValidVal = CCValid->getZExtValue();
+  int CCMaskVal = CCMask->getZExtValue();
+  SDValue Glue = N->getOperand(4);
+
+  if (combineCCMask(Glue, CCValidVal, CCMaskVal))
+    return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
+                       N->getOperand(0),
+                       DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
+                       DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
+                       N->getOperand(3), Glue);
+  return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineSELECT_CCMASK(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
+  auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
+  auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
+  if (!CCValid || !CCMask)
+    return SDValue();
+
+  int CCValidVal = CCValid->getZExtValue();
+  int CCMaskVal = CCMask->getZExtValue();
+  SDValue Glue = N->getOperand(4);
+
+  if (combineCCMask(Glue, CCValidVal, CCMaskVal))
+    return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
+                       N->getOperand(0),
+                       N->getOperand(1),
+                       DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
+                       DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
+                       Glue);
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   switch(N->getOpcode()) {
   default: break;
+  case ISD::ZERO_EXTEND:        return combineZERO_EXTEND(N, DCI);
   case ISD::SIGN_EXTEND:        return combineSIGN_EXTEND(N, DCI);
+  case ISD::SIGN_EXTEND_INREG:  return combineSIGN_EXTEND_INREG(N, DCI);
   case SystemZISD::MERGE_HIGH:
   case SystemZISD::MERGE_LOW:   return combineMERGE(N, DCI);
   case ISD::STORE:              return combineSTORE(N, DCI);
@@ -5491,11 +5549,37 @@
   case ISD::SRA:
   case ISD::SRL:
   case ISD::ROTL:               return combineSHIFTROT(N, DCI);
+  case SystemZISD::BR_CCMASK:   return combineBR_CCMASK(N, DCI);
+  case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
   }
 
   return SDValue();
 }
 
+void
+SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+                                                     KnownBits &Known,
+                                                     const APInt &DemandedElts,
+                                                     const SelectionDAG &DAG,
+                                                     unsigned Depth) const {
+  unsigned BitWidth = Known.getBitWidth();
+
+  Known.resetAll();
+  switch (Op.getOpcode()) {
+  case SystemZISD::SELECT_CCMASK: {
+    KnownBits TrueKnown(BitWidth), FalseKnown(BitWidth);
+    DAG.computeKnownBits(Op.getOperand(0), TrueKnown, Depth + 1);
+    DAG.computeKnownBits(Op.getOperand(1), FalseKnown, Depth + 1);
+    Known.Zero = TrueKnown.Zero & FalseKnown.Zero;
+    Known.One = TrueKnown.One & FalseKnown.One;
+    break;
+  }
+
+  default:
+    break;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Custom insertion
 //===----------------------------------------------------------------------===//
Index: lib/Target/SystemZ/SystemZInstrInfo.td
===================================================================
--- lib/Target/SystemZ/SystemZInstrInfo.td
+++ lib/Target/SystemZ/SystemZInstrInfo.td
@@ -2131,18 +2131,6 @@
 def  : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
            (SLGF GR64:$src1, bdxaddr20only:$addr)>;
 
-// Optimize sign-extended 1/0 selects to -1/0 selects.  This is important
-// for vector legalization.
-def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, imm32zx4:$cc)),
-                         (i32 31)),
-                    (i32 31)),
-          (Select32 (LHI -1), (LHI 0), imm32zx4:$valid, imm32zx4:$cc)>;
-def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid,
-                                                       imm32zx4:$cc)))),
-                    (i32 63)),
-               (i32 63)),
-          (Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>;
-
 // Avoid generating 2 XOR instructions. (xor (and x, y), y) is
 // equivalent to (and (xor x, -1), y)
 def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
Index: test/CodeGen/SystemZ/branch-07.ll
===================================================================
--- test/CodeGen/SystemZ/branch-07.ll
+++ test/CodeGen/SystemZ/branch-07.ll
@@ -129,8 +129,8 @@
 ; CHECK-LABEL: f9:
 ; CHECK: ipm [[REG:%r[0-5]]]
 ; CHECK: afi [[REG]], -268435456
-; CHECK: risbg [[REG2:%r[0-5]]], [[REG]], 63, 191, 33
-; CHECK: lcgr  {{%r[0-5]}}, [[REG2]]
+; CHECK: sllg [[REG2:%r[0-5]]], [[REG]], 32
+; CHECK: srag {{%r[0-5]}}, [[REG2]], 63
 ; CHECK: br %r14
   %avec = bitcast i64 %a to <2 x i32>
   %bvec = bitcast i64 %b to <2 x i32>
@@ -145,8 +145,8 @@
 ; CHECK-LABEL: f10:
 ; CHECK: ipm [[REG:%r[0-5]]]
 ; CHECK: afi [[REG]], 1879048192
-; CHECK: risbg [[REG2:%r[0-5]]], [[REG]], 63, 191, 33
-; CHECK: lcgr  {{%r[0-5]}}, [[REG2]]
+; CHECK: sllg [[REG2:%r[0-5]]], [[REG]], 32
+; CHECK: srag {{%r[0-5]}}, [[REG2]], 63
 ; CHECK: br %r14
   %avec = bitcast i64 %a to <2 x i32>
   %bvec = bitcast i64 %b to <2 x i32>
Index: test/CodeGen/SystemZ/cmpxchg-01.ll
===================================================================
--- test/CodeGen/SystemZ/cmpxchg-01.ll
+++ test/CodeGen/SystemZ/cmpxchg-01.ll
@@ -89,3 +89,83 @@
   ret i32 %res
 }
 
+
+declare void @g()
+
+; Check using the comparison result for a branch.
+; CHECK-LABEL: f4
+; CHECK-MAIN-LABEL: f4:
+; CHECK-MAIN: risbg [[RISBG:%r[1-9]+]], %r2, 0, 189, 0{{$}}
+; CHECK-MAIN-DAG: sll %r2, 3
+; CHECK-MAIN-DAG: l [[OLD:%r[0-9]+]], 0([[RISBG]])
+; CHECK-MAIN: [[LOOP:\.[^ ]*]]:
+; CHECK-MAIN: rll [[TMP:%r[0-9]+]], [[OLD]], 8(%r2)
+; CHECK-MAIN: risbg %r3, [[TMP]], 32, 55, 0
+; CHECK-MAIN: cr [[TMP]], %r3
+; CHECK-MAIN: jlh [[EXIT:\.[^ ]*]]
+; CHECK-MAIN: risbg %r4, [[TMP]], 32, 55, 0
+; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r4, -8({{%r[1-9]+}})
+; CHECK-MAIN: cs [[OLD]], [[NEW]], 0([[RISBG]])
+; CHECK-MAIN: jl [[LOOP]]
+; CHECK-MAIN: [[EXIT]]:
+; CHECK-MAIN-NEXT: jlh [[LABEL:\.[^ ]*]]
+; CHECK-MAIN: jg g
+; CHECK-MAIN: [[LABEL]]:
+; CHECK-MAIN: br %r14
+;
+; CHECK-SHIFT-LABEL: f4:
+; CHECK-SHIFT: sll %r2, 3
+; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], %r2
+; CHECK-SHIFT: rll
+; CHECK-SHIFT: rll {{%r[0-9]+}}, %r4, -8([[NEGSHIFT]])
+define void @f4(i8 *%src, i8 %cmp, i8 %swap) {
+  %pair = cmpxchg i8 *%src, i8 %cmp, i8 %swap seq_cst seq_cst
+  %cond = extractvalue { i8, i1 } %pair, 1
+  br i1 %cond, label %call, label %exit
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ... and the same with the inverted direction.
+; CHECK-MAIN-LABEL: f5:
+; CHECK-MAIN: risbg [[RISBG:%r[1-9]+]], %r2, 0, 189, 0{{$}}
+; CHECK-MAIN-DAG: sll %r2, 3
+; CHECK-MAIN-DAG: l [[OLD:%r[0-9]+]], 0([[RISBG]])
+; CHECK-MAIN: [[LOOP:\.[^ ]*]]:
+; CHECK-MAIN: rll [[TMP:%r[0-9]+]], [[OLD]], 8(%r2)
+; CHECK-MAIN: risbg %r3, [[TMP]], 32, 55, 0
+; CHECK-MAIN: cr [[TMP]], %r3
+; CHECK-MAIN: jlh [[EXIT:\.[^ ]*]]
+; CHECK-MAIN: risbg %r4, [[TMP]], 32, 55, 0
+; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r4, -8({{%r[1-9]+}})
+; CHECK-MAIN: cs [[OLD]], [[NEW]], 0([[RISBG]])
+; CHECK-MAIN: jl [[LOOP]]
+; CHECK-MAIN: [[EXIT]]:
+; CHECK-MAIN-NEXT: jlh [[LABEL:\.[^ ]*]]
+; CHECK-MAIN: br %r14
+; CHECK-MAIN: [[LABEL]]:
+; CHECK-MAIN: jg g
+;
+; CHECK-SHIFT-LABEL: f5:
+; CHECK-SHIFT: sll %r2, 3
+; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], %r2
+; CHECK-SHIFT: rll
+; CHECK-SHIFT: rll {{%r[0-9]+}}, %r4, -8([[NEGSHIFT]])
+define void @f5(i8 *%src, i8 %cmp, i8 %swap) {
+  %pair = cmpxchg i8 *%src, i8 %cmp, i8 %swap seq_cst seq_cst
+  %cond = extractvalue { i8, i1 } %pair, 1
+  br i1 %cond, label %exit, label %call
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
Index: test/CodeGen/SystemZ/cmpxchg-02.ll
===================================================================
--- test/CodeGen/SystemZ/cmpxchg-02.ll
+++ test/CodeGen/SystemZ/cmpxchg-02.ll
@@ -89,3 +89,82 @@
   ret i32 %res
 }
 
+declare void @g()
+
+; Check using the comparison result for a branch.
+; CHECK-LABEL: f4
+; CHECK-MAIN-LABEL: f4:
+; CHECK-MAIN: risbg [[RISBG:%r[1-9]+]], %r2, 0, 189, 0{{$}}
+; CHECK-MAIN-DAG: sll %r2, 3
+; CHECK-MAIN-DAG: l [[OLD:%r[0-9]+]], 0([[RISBG]])
+; CHECK-MAIN: [[LOOP:\.[^ ]*]]:
+; CHECK-MAIN: rll [[TMP:%r[0-9]+]], [[OLD]], 16(%r2)
+; CHECK-MAIN: risbg %r3, [[TMP]], 32, 47, 0
+; CHECK-MAIN: cr [[TMP]], %r3
+; CHECK-MAIN: jlh [[EXIT:\.[^ ]*]]
+; CHECK-MAIN: risbg %r4, [[TMP]], 32, 47, 0
+; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r4, -16({{%r[1-9]+}})
+; CHECK-MAIN: cs [[OLD]], [[NEW]], 0([[RISBG]])
+; CHECK-MAIN: jl [[LOOP]]
+; CHECK-MAIN: [[EXIT]]:
+; CHECK-MAIN-NEXT: jlh [[LABEL:\.[^ ]*]]
+; CHECK-MAIN: jg g
+; CHECK-MAIN: [[LABEL]]:
+; CHECK-MAIN: br %r14
+;
+; CHECK-SHIFT-LABEL: f4:
+; CHECK-SHIFT: sll %r2, 3
+; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], %r2
+; CHECK-SHIFT: rll
+; CHECK-SHIFT: rll {{%r[0-9]+}}, %r4, -16([[NEGSHIFT]])
+define void @f4(i16 *%src, i16 %cmp, i16 %swap) {
+  %pair = cmpxchg i16 *%src, i16 %cmp, i16 %swap seq_cst seq_cst
+  %cond = extractvalue { i16, i1 } %pair, 1
+  br i1 %cond, label %call, label %exit
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ... and the same with the inverted direction.
+; CHECK-MAIN-LABEL: f5:
+; CHECK-MAIN: risbg [[RISBG:%r[1-9]+]], %r2, 0, 189, 0{{$}}
+; CHECK-MAIN-DAG: sll %r2, 3
+; CHECK-MAIN-DAG: l [[OLD:%r[0-9]+]], 0([[RISBG]])
+; CHECK-MAIN: [[LOOP:\.[^ ]*]]:
+; CHECK-MAIN: rll [[TMP:%r[0-9]+]], [[OLD]], 16(%r2)
+; CHECK-MAIN: risbg %r3, [[TMP]], 32, 47, 0
+; CHECK-MAIN: cr [[TMP]], %r3
+; CHECK-MAIN: jlh [[EXIT:\.[^ ]*]]
+; CHECK-MAIN: risbg %r4, [[TMP]], 32, 47, 0
+; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r4, -16({{%r[1-9]+}})
+; CHECK-MAIN: cs [[OLD]], [[NEW]], 0([[RISBG]])
+; CHECK-MAIN: jl [[LOOP]]
+; CHECK-MAIN: [[EXIT]]:
+; CHECK-MAIN-NEXT: jlh [[LABEL:\.[^ ]*]]
+; CHECK-MAIN: br %r14
+; CHECK-MAIN: [[LABEL]]:
+; CHECK-MAIN: jg g
+;
+; CHECK-SHIFT-LABEL: f5:
+; CHECK-SHIFT: sll %r2, 3
+; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], %r2
+; CHECK-SHIFT: rll
+; CHECK-SHIFT: rll {{%r[0-9]+}}, %r4, -16([[NEGSHIFT]])
+define void @f5(i16 *%src, i16 %cmp, i16 %swap) {
+  %pair = cmpxchg i16 *%src, i16 %cmp, i16 %swap seq_cst seq_cst
+  %cond = extractvalue { i16, i1 } %pair, 1
+  br i1 %cond, label %exit, label %call
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
Index: test/CodeGen/SystemZ/cmpxchg-03.ll
===================================================================
--- test/CodeGen/SystemZ/cmpxchg-03.ll
+++ test/CodeGen/SystemZ/cmpxchg-03.ll
@@ -155,3 +155,42 @@
   %res = zext i1 %val to i32
   ret i32 %res
 }
+
+declare void @g()
+
+; Check using the comparison result for a branch.
+; CHECK-LABEL: f14
+; CHECK: cs %r2, %r3, 0(%r4)
+; CHECK-NEXT: jge g
+; CHECK: br %r14
+define void @f14(i32 %cmp, i32 %swap, i32 *%src) {
+  %pairval = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst seq_cst
+  %cond = extractvalue { i32, i1 } %pairval, 1
+  br i1 %cond, label %call, label %exit
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ... and the same with the inverted direction.
+; CHECK-LABEL: f15
+; CHECK: cs %r2, %r3, 0(%r4)
+; CHECK-NEXT: jgl g
+; CHECK: br %r14
+define void @f15(i32 %cmp, i32 %swap, i32 *%src) {
+  %pairval = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst seq_cst
+  %cond = extractvalue { i32, i1 } %pairval, 1
+  br i1 %cond, label %exit, label %call
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
Index: test/CodeGen/SystemZ/cmpxchg-04.ll
===================================================================
--- test/CodeGen/SystemZ/cmpxchg-04.ll
+++ test/CodeGen/SystemZ/cmpxchg-04.ll
@@ -120,3 +120,41 @@
   ret i32 %res
 }
 
+declare void @g()
+
+; Check using the comparison result for a branch.
+; CHECK-LABEL: f11
+; CHECK: csg %r2, %r3, 0(%r4)
+; CHECK-NEXT: jge g
+; CHECK: br %r14
+define void @f11(i64 %cmp, i64 %swap, i64 *%src) {
+  %pairval = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst seq_cst
+  %cond = extractvalue { i64, i1 } %pairval, 1
+  br i1 %cond, label %call, label %exit
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ... and the same with the inverted direction.
+; CHECK-LABEL: f12
+; CHECK: csg %r2, %r3, 0(%r4)
+; CHECK-NEXT: jgl g
+; CHECK: br %r14
+define void @f12(i64 %cmp, i64 %swap, i64 *%src) {
+  %pairval = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst seq_cst
+  %cond = extractvalue { i64, i1 } %pairval, 1
+  br i1 %cond, label %exit, label %call
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
Index: test/CodeGen/SystemZ/cmpxchg-06.ll
===================================================================
--- test/CodeGen/SystemZ/cmpxchg-06.ll
+++ test/CodeGen/SystemZ/cmpxchg-06.ll
@@ -129,3 +129,54 @@
   %res = zext i1 %val to i32
   ret i32 %res
 }
+
+declare void @g()
+
+; Check using the comparison result for a branch.
+; CHECK-LABEL: f11
+; CHECK-DAG: lg %r1, 8(%r3)
+; CHECK-DAG: lg %r0, 0(%r3)
+; CHECK-DAG: lg %r13, 8(%r2)
+; CHECK-DAG: lg %r12, 0(%r2)
+; CHECK:     cdsg %r12, %r0, 0(%r4)
+; CHECK-NEXT: jl [[LABEL:\.[^ ]*]]
+; CHECK: jg g
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+define void @f11(i128 %cmp, i128 %swap, i128 *%src) {
+  %pairval = cmpxchg i128 *%src, i128 %cmp, i128 %swap seq_cst seq_cst
+  %cond = extractvalue { i128, i1 } %pairval, 1
+  br i1 %cond, label %call, label %exit
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ... and the same with the inverted direction.
+; CHECK-LABEL: f12
+; CHECK-DAG: lg %r1, 8(%r3)
+; CHECK-DAG: lg %r0, 0(%r3)
+; CHECK-DAG: lg %r13, 8(%r2)
+; CHECK-DAG: lg %r12, 0(%r2)
+; CHECK:     cdsg %r12, %r0, 0(%r4)
+; CHECK-NEXT: jl [[LABEL:\.[^ ]*]]
+; CHECK: br %r14
+; CHECK: [[LABEL]]:
+; CHECK: jg g
+define void @f12(i128 %cmp, i128 %swap, i128 *%src) {
+  %pairval = cmpxchg i128 *%src, i128 %cmp, i128 %swap seq_cst seq_cst
+  %cond = extractvalue { i128, i1 } %pairval, 1
+  br i1 %cond, label %exit, label %call
+
+call:
+  tail call void @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
Index: test/CodeGen/SystemZ/dag-combine-02.ll
===================================================================
--- test/CodeGen/SystemZ/dag-combine-02.ll
+++ test/CodeGen/SystemZ/dag-combine-02.ll
@@ -94,8 +94,8 @@
 
 ; <label>:61:                                     ; preds = %13
 ; CHECK-LABEL: %bb.6:
-; CHECK: stgrl   %r1, g_56
-; CHECK: llhrl   %r1, g_56+6
+; CHECK: stgrl   %r0, g_56
+; CHECK: llhrl   %r0, g_56+6
 ; CHECK: stgrl   %r2, g_56
   store i64 0, i64* @g_56, align 8
   %62 = bitcast [4 x [7 x i16*]]* %3 to i8*
Index: test/CodeGen/SystemZ/vec-intrinsics-01.ll
===================================================================
--- test/CodeGen/SystemZ/vec-intrinsics-01.ll
+++ test/CodeGen/SystemZ/vec-intrinsics-01.ll
@@ -1736,9 +1736,8 @@
 define i32 @test_vceqbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: test_vceqbs_any_bool:
 ; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm %r2
-; CHECK: afi %r2, -536870912
-; CHECK: srl %r2, 31
+; CHECK: lhi %r2, 0
+; CHECK: lochile %r2, 1
 ; CHECK: br %r14
   %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b)
   %res = extractvalue {<16 x i8>, i32} %call, 1
@@ -1785,8 +1784,8 @@
 define i32 @test_vceqhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vceqhs_notall_bool:
 ; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: lhi %r2, 0
+; CHECK: lochinhe %r2, 1
 ; CHECK: br %r14
   %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b)
   %res = extractvalue {<8 x i16>, i32} %call, 1
@@ -1834,8 +1833,8 @@
 define i32 @test_vceqfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vceqfs_none_bool:
 ; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: lhi %r2, 0
+; CHECK: lochio %r2, 1
 ; CHECK: br %r14
   %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b)
   %res = extractvalue {<4 x i32>, i32} %call, 1
@@ -1883,9 +1882,8 @@
 define i32 @test_vceqgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vceqgs_all_bool:
 ; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm %r2
-; CHECK: afi %r2, -268435456
-; CHECK: srl %r2, 31
+; CHECK: lhi %r2, 0
+; CHECK: lochie %r2, 1
 ; CHECK: br %r14
   %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b)
   %res = extractvalue {<2 x i64>, i32} %call, 1
@@ -1932,9 +1930,8 @@
 define i32 @test_vchbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: test_vchbs_any_bool:
 ; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm %r2
-; CHECK: afi %r2, -536870912
-; CHECK: srl %r2, 31
+; CHECK: lhi %r2, 0
+; CHECK: lochile %r2, 1
 ; CHECK: br %r14
   %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b)
   %res = extractvalue {<16 x i8>, i32} %call, 1
@@ -1981,8 +1978,8 @@
 define i32 @test_vchhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vchhs_notall_bool:
 ; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: lhi %r2, 0
+; CHECK: lochinhe %r2, 1
 ; CHECK: br %r14
   %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b)
   %res = extractvalue {<8 x i16>, i32} %call, 1
@@ -2030,8 +2027,8 @@
 define i32 @test_vchfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vchfs_none_bool:
 ; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: lhi %r2, 0
+; CHECK: lochio %r2, 1
 ; CHECK: br %r14
   %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b)
   %res = extractvalue {<4 x i32>, i32} %call, 1
@@ -2078,9 +2075,8 @@
 define i32 @test_vchgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vchgs_all_bool:
 ; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm %r2
-; CHECK: afi %r2, -268435456
-; CHECK: srl %r2, 31
+; CHECK: lhi %r2, 0
+; CHECK: lochie %r2, 1
 ; CHECK: br %r14
   %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b)
   %res = extractvalue {<2 x i64>, i32} %call, 1
@@ -2127,9 +2123,8 @@
 define i32 @test_vchlbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: test_vchlbs_any_bool:
 ; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm %r2
-; CHECK: afi %r2, -536870912
-; CHECK: srl %r2, 31
+; CHECK: lhi %r2, 0
+; CHECK: lochile %r2, 1
 ; CHECK: br %r14
   %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b)
   %res = extractvalue {<16 x i8>, i32} %call, 1
@@ -2176,8 +2171,8 @@
 define i32 @test_vchlhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vchlhs_notall_bool:
 ; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: lhi %r2, 0
+; CHECK: lochinhe %r2, 1
 ; CHECK: br %r14
   %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b)
   %res = extractvalue {<8 x i16>, i32} %call, 1
@@ -2225,8 +2220,8 @@
 define i32 @test_vchlfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vchlfs_none_bool:
 ; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: lhi %r2, 0
+; CHECK: lochio %r2, 1
 ; CHECK: br %r14
   %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b)
   %res = extractvalue {<4 x i32>, i32} %call, 1
@@ -2274,9 +2269,8 @@
 define i32 @test_vchlgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vchlgs_all_bool:
 ; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm %r2
-; CHECK: afi %r2, -268435456
-; CHECK: srl %r2, 31
+; CHECK: lhi %r2, 0
+; CHECK: lochie %r2, 1
 ; CHECK: br %r14
   %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b)
   %res = extractvalue {<2 x i64>, i32} %call, 1
@@ -3148,9 +3142,8 @@
 define i32 @test_vfcedbs_any_bool(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vfcedbs_any_bool:
 ; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm %r2
-; CHECK: afi %r2, -536870912
-; CHECK: srl %r2, 31
+; CHECK: lhi %r2, 0
+; CHECK: lochile %r2, 1
 ; CHECK: br %r14
   %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a,
                                                    <2 x double> %b)
@@ -3201,8 +3194,8 @@
 define i32 @test_vfchdbs_notall_bool(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vfchdbs_notall_bool:
 ; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: lhi %r2, 0
+; CHECK: lochinhe %r2, 1
 ; CHECK: br %r14
   %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a,
                                                    <2 x double> %b)
@@ -3253,8 +3246,8 @@
 define i32 @test_vfchedbs_none_bool(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_vfchedbs_none_bool:
 ; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: lhi %r2, 0
+; CHECK: lochio %r2, 1
 ; CHECK: br %r14
   %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a,
 						    <2 x double> %b)
@@ -3305,8 +3298,8 @@
 define i32 @test_vftcidb_all_bool(<2 x double> %a) {
 ; CHECK-LABEL: test_vftcidb_all_bool:
 ; CHECK: vftcidb {{%v[0-9]+}}, %v24, 4094
-; CHECK: afi %r2, -268435456
-; CHECK: srl %r2, 31
+; CHECK: lhi %r2, 0
+; CHECK: lochie %r2, 1
 ; CHECK: br %r14
   %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 4094)
   %res = extractvalue {<2 x i64>, i32} %call, 1
Index: test/CodeGen/SystemZ/vec-intrinsics-02.ll
===================================================================
--- test/CodeGen/SystemZ/vec-intrinsics-02.ll
+++ test/CodeGen/SystemZ/vec-intrinsics-02.ll
@@ -218,9 +218,8 @@
 define i32 @test_vfcesbs_any_bool(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: test_vfcesbs_any_bool:
 ; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm %r2
-; CHECK: afi %r2, -536870912
-; CHECK: srl %r2, 31
+; CHECK: lhi %r2, 0
+; CHECK: lochile %r2, 1
 ; CHECK: br %r14
   %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
                                                    <4 x float> %b)
@@ -271,8 +270,8 @@
 define i32 @test_vfchsbs_notall_bool(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: test_vfchsbs_notall_bool:
 ; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: lhi %r2, 0
+; CHECK: lochinhe %r2, 1
 ; CHECK: br %r14
   %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
                                                    <4 x float> %b)
@@ -323,8 +322,8 @@
 define i32 @test_vfchesbs_none_bool(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: test_vfchesbs_none_bool:
 ; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26
-; CHECK: ipm [[REG:%r[0-5]]]
-; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: lhi %r2, 0
+; CHECK: lochio %r2, 1
 ; CHECK: br %r14
   %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
 						    <4 x float> %b)
@@ -375,8 +374,8 @@
 define i32 @test_vftcisb_all_bool(<4 x float> %a) {
 ; CHECK-LABEL: test_vftcisb_all_bool:
 ; CHECK: vftcisb {{%v[0-9]+}}, %v24, 4094
-; CHECK: afi %r2, -268435456
-; CHECK: srl %r2, 31
+; CHECK: lhi %r2, 0
+; CHECK: lochie %r2, 1
 ; CHECK: br %r14
   %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 4094)
   %res = extractvalue {<4 x i32>, i32} %call, 1