Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -3436,7 +3436,9 @@
     may be any of the :ref:`binary <binaryops>` or :ref:`bitwise
     binary <bitwiseops>` operations. The constraints on operands are
     the same as those for the corresponding instruction (e.g. no bitwise
-    operations on floating-point values are allowed).
+    operations on floating-point values are allowed). Division by zero
+    and overflowing signed division produce poison (unlike division
+    and remainder instructions, which have undefined behavior).
 
 Other Values
 ============
Index: docs/ReleaseNotes.rst
===================================================================
--- docs/ReleaseNotes.rst
+++ docs/ReleaseNotes.rst
@@ -72,6 +72,10 @@
   pointee type. In the next release we intend to make this parameter
   mandatory in preparation for opaque pointer types.
 
+* The semantics of constant expressions have changed so it is no longer
+  possible for a constant expression to have undefined behavior. The
+  ``Constant::canTrap()`` C++ API has been removed.
+
 Changes to the ARM Backend
 --------------------------
 
Index: include/llvm/Analysis/ValueTracking.h
===================================================================
--- include/llvm/Analysis/ValueTracking.h
+++ include/llvm/Analysis/ValueTracking.h
@@ -388,7 +388,7 @@
   ///
   /// This method can return true for instructions that read memory;
   /// for such instructions, moving them may change the resulting value.
-  bool isSafeToSpeculativelyExecute(const Value *V,
+  bool isSafeToSpeculativelyExecute(const Instruction *I,
                                     const Instruction *CtxI = nullptr,
                                     const DominatorTree *DT = nullptr);
 
Index: include/llvm/CodeGen/GlobalISel/IRTranslator.h
===================================================================
--- include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -337,15 +337,27 @@
   }
 
   bool translateUDiv(const User &U, MachineIRBuilder &MIRBuilder) {
+    // Non-trapping div for ConstantExpr not yet implemented.
+    if (isa<ConstantExpr>(U))
+      return false;
     return translateBinaryOp(TargetOpcode::G_UDIV, U, MIRBuilder);
   }
   bool translateSDiv(const User &U, MachineIRBuilder &MIRBuilder) {
+    // Non-trapping div for ConstantExpr not yet implemented.
+    if (isa<ConstantExpr>(U))
+      return false;
     return translateBinaryOp(TargetOpcode::G_SDIV, U, MIRBuilder);
   }
   bool translateURem(const User &U, MachineIRBuilder &MIRBuilder) {
+    // Non-trapping div for ConstantExpr not yet implemented.
+    if (isa<ConstantExpr>(U))
+      return false;
     return translateBinaryOp(TargetOpcode::G_UREM, U, MIRBuilder);
   }
   bool translateSRem(const User &U, MachineIRBuilder &MIRBuilder) {
+    // Non-trapping div for ConstantExpr not yet implemented.
+    if (isa<ConstantExpr>(U))
+      return false;
     return translateBinaryOp(TargetOpcode::G_SREM, U, MIRBuilder);
   }
   bool translateIntToPtr(const User &U, MachineIRBuilder &MIRBuilder) {
Index: include/llvm/IR/Constant.h
===================================================================
--- include/llvm/IR/Constant.h
+++ include/llvm/IR/Constant.h
@@ -94,10 +94,6 @@
   /// expressions.
   bool containsConstantExpression() const;
 
-  /// Return true if evaluation of this constant could trap. This is true for
-  /// things like constant expressions that could divide by zero.
-  bool canTrap() const;
-
   /// Return true if the value can vary between threads.
   bool isThreadDependent() const;
 
Index: include/llvm/IR/Constants.h
===================================================================
--- include/llvm/IR/Constants.h
+++ include/llvm/IR/Constants.h
@@ -1246,6 +1246,9 @@
   /// Returns an Instruction which implements the same operation as this
   /// ConstantExpr. The instruction is not linked to any basic block.
   ///
+  /// For division operations, the denominator may be rewritten to avoid
+  /// generating a division which would trap.
+  ///
   /// A better approach to this could be to have a constructor for Instruction
   /// which would take a ConstantExpr parameter, but that would have spread
   /// implementation details of ConstantExpr outside of Constants.cpp, which
Index: lib/Analysis/CodeMetrics.cpp
===================================================================
--- lib/Analysis/CodeMetrics.cpp
+++ lib/Analysis/CodeMetrics.cpp
@@ -34,7 +34,8 @@
 
   for (const Value *Operand : U->operands())
     if (Visited.insert(Operand).second)
-      if (isSafeToSpeculativelyExecute(Operand))
+      if (!isa<Instruction>(Operand) ||
+          isSafeToSpeculativelyExecute(cast<Instruction>(Operand)))
         Worklist.push_back(Operand);
 }
 
Index: lib/Analysis/ValueTracking.cpp
===================================================================
--- lib/Analysis/ValueTracking.cpp
+++ lib/Analysis/ValueTracking.cpp
@@ -485,7 +485,8 @@
       if (V == E)
         return true;
 
-      if (V == I || isSafeToSpeculativelyExecute(V)) {
+      if (V == I || !isa<Instruction>(V) ||
+          isSafeToSpeculativelyExecute(cast<Instruction>(V))) {
        EphValues.insert(V);
        if (const User *U = dyn_cast<User>(V))
          for (User::const_op_iterator J = U->op_begin(), JE = U->op_end();
@@ -3895,18 +3896,9 @@
   return true;
 }
 
-bool llvm::isSafeToSpeculativelyExecute(const Value *V,
+bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst,
                                         const Instruction *CtxI,
                                         const DominatorTree *DT) {
-  const Operator *Inst = dyn_cast<Operator>(V);
-  if (!Inst)
-    return false;
-
-  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
-    if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
-      if (C->canTrap())
-        return false;
-
   switch (Inst->getOpcode()) {
   default:
     return true;
Index: lib/CodeGen/SelectionDAG/FastISel.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/FastISel.cpp
+++ lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1817,14 +1817,26 @@
   case Instruction::FMul:
     return selectBinaryOp(I, ISD::FMUL);
   case Instruction::SDiv:
+    // Non-trapping div for ConstantExpr not yet implemented.
+    if (isa<ConstantExpr>(I))
+      return false;
     return selectBinaryOp(I, ISD::SDIV);
   case Instruction::UDiv:
+    // Non-trapping div for ConstantExpr not yet implemented.
+    if (isa<ConstantExpr>(I))
+      return false;
     return selectBinaryOp(I, ISD::UDIV);
   case Instruction::FDiv:
     return selectBinaryOp(I, ISD::FDIV);
   case Instruction::SRem:
+    // Non-trapping div for ConstantExpr not yet implemented.
+    if (isa<ConstantExpr>(I))
+      return false;
     return selectBinaryOp(I, ISD::SREM);
   case Instruction::URem:
+    // Non-trapping div for ConstantExpr not yet implemented.
+    if (isa<ConstantExpr>(I))
+      return false;
     return selectBinaryOp(I, ISD::UREM);
   case Instruction::FRem:
     return selectBinaryOp(I, ISD::FREM);
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -878,17 +878,18 @@
 
   void visitBinary(const User &I, unsigned Opcode);
   void visitShift(const User &I, unsigned Opcode);
+  void visitDivRem(const User &I, unsigned Opcode);
   void visitAdd(const User &I)  { visitBinary(I, ISD::ADD); }
   void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
   void visitSub(const User &I)  { visitBinary(I, ISD::SUB); }
   void visitFSub(const User &I);
   void visitMul(const User &I)  { visitBinary(I, ISD::MUL); }
   void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
-  void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
-  void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+  void visitURem(const User &I) { visitDivRem(I, ISD::UREM); }
+  void visitSRem(const User &I) { visitDivRem(I, ISD::SREM); }
   void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
-  void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
-  void visitSDiv(const User &I);
+  void visitUDiv(const User &I) { visitDivRem(I, ISD::UDIV); }
+  void visitSDiv(const User &I) { visitDivRem(I, ISD::SDIV); }
   void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
   void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
   void visitOr  (const User &I) { visitBinary(I, ISD::OR); }
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3206,15 +3206,48 @@
   setValue(&I, Res);
 }
 
-void SelectionDAGBuilder::visitSDiv(const User &I) {
+void SelectionDAGBuilder::visitDivRem(const User &I, unsigned Opcode) {
+  if (!isa<Constant>(I))
+    return visitBinary(I, Opcode);
+
+  // Constants aren't allowed to trap, so we have to do something
+  // a bit trickier.
+  //
+  // FIXME: Some targets have a cheap non-trapping div.
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
-
-  SDNodeFlags Flags;
-  Flags.setExact(isa<PossiblyExactOperator>(&I) &&
-                 cast<PossiblyExactOperator>(&I)->isExact());
-  setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
-                           Op2, Flags));
+  SDLoc dl(getCurSDLoc());
+  EVT VT = Op1.getValueType();
+  if (Opcode == ISD::UDIV || Opcode == ISD::UREM) {
+    // Ensure the denominator is not zero.
+    Op2 = DAG.getNode(ISD::UMAX, dl, VT, Op2, DAG.getConstant(1, dl, VT));
+  } else {
+    // Ensure the denominator is not zero, and we are not dividing INT_MIN
+    // by -1.
+    auto &TLI = DAG.getTargetLoweringInfo();
+    EVT CCVT =
+        TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+    SDValue IsZero =
+        DAG.getSetCC(dl, CCVT, Op2, DAG.getConstant(0, dl, VT), ISD::SETEQ);
+    SDValue IsNegOne =
+        DAG.getSetCC(dl, CCVT, Op2, DAG.getAllOnesConstant(dl, VT), ISD::SETEQ);
+    auto IntMin = APInt::getSignedMinValue(VT.getScalarSizeInBits());
+    SDValue IsIntMin = DAG.getSetCC(
+        dl, CCVT, Op1, DAG.getConstant(IntMin, dl, VT), ISD::SETEQ);
+    SDValue IsIntMinOverNegOne =
+        DAG.getNode(ISD::AND, dl, CCVT, IsNegOne, IsIntMin);
+    SDValue IsInvalid =
+        DAG.getNode(ISD::OR, dl, CCVT, IsZero, IsIntMinOverNegOne);
+    ISD::NodeType SelectOpCode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
+    Op2 = DAG.getNode(SelectOpCode, dl, VT, IsInvalid,
+                      DAG.getConstant(1, dl, VT), Op2);
+  }
+
+  SDNodeFlags DivFlags;
+  if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
+    DivFlags.setExact(ExactOp->isExact());
+  SDValue BinNodeValue = DAG.getNode(Opcode, dl, VT, Op1, Op2, DivFlags);
+  setValue(&I, BinNodeValue);
 }
 
 void SelectionDAGBuilder::visitICmp(const User &I) {
Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -342,47 +342,6 @@
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
-/// may trap on it.  In this case we have to split the edge so that the path
-/// through the predecessor block that doesn't go to the phi block doesn't
-/// execute the possibly trapping instruction. If available, we pass domtree
-/// and loop info to be updated when we split critical edges. This is because
-/// SelectionDAGISel preserves these analyses.
-/// This is required for correctness, so it must be done at -O0.
-///
-static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
-                                         LoopInfo *LI) {
-  // Loop for blocks with phi nodes.
-  for (BasicBlock &BB : Fn) {
-    PHINode *PN = dyn_cast<PHINode>(BB.begin());
-    if (!PN) continue;
-
-  ReprocessBlock:
-    // For each block with a PHI node, check to see if any of the input values
-    // are potentially trapping constant expressions.  Constant expressions are
-    // the only potentially trapping value that can occur as the argument to a
-    // PHI.
-    for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I)
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-        ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
-        if (!CE || !CE->canTrap()) continue;
-
-        // The only case we have to worry about is when the edge is critical.
-        // Since this block has a PHI Node, we assume it has multiple input
-        // edges: check to see if the pred has multiple successors.
-        BasicBlock *Pred = PN->getIncomingBlock(i);
-        if (Pred->getTerminator()->getNumSuccessors() == 1)
-          continue;
-
-        // Okay, we have to split this edge.
-        SplitCriticalEdge(
-            Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
-            CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges());
-        goto ReprocessBlock;
-      }
-  }
-}
-
 static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F,
                                          MachineModuleInfo &MMI) {
   // Only needed for MSVC
@@ -437,15 +396,9 @@
   LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
   ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
-  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
-  LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
 
   LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
-  SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
-
   CurDAG->init(*MF, *ORE, this, LibInfo,
    getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
   FuncInfo->set(Fn, *MF, CurDAG);
Index: lib/IR/Constants.cpp
===================================================================
--- lib/IR/Constants.cpp
+++ lib/IR/Constants.cpp
@@ -408,42 +408,6 @@
   delete this;
 }
 
-static bool canTrapImpl(const Constant *C,
-                        SmallPtrSetImpl<const ConstantExpr *> &NonTrappingOps) {
-  assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
-  // The only thing that could possibly trap are constant exprs.
-  const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
-  if (!CE)
-    return false;
-
-  // ConstantExpr traps if any operands can trap.
-  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
-    if (ConstantExpr *Op = dyn_cast<ConstantExpr>(CE->getOperand(i))) {
-      if (NonTrappingOps.insert(Op).second && canTrapImpl(Op, NonTrappingOps))
-        return true;
-    }
-  }
-
-  // Otherwise, only specific operations can trap.
-  switch (CE->getOpcode()) {
-  default:
-    return false;
-  case Instruction::UDiv:
-  case Instruction::SDiv:
-  case Instruction::URem:
-  case Instruction::SRem:
-    // Div and rem can trap if the RHS is not known to be non-zero.
-    if (!isa<ConstantInt>(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue())
-      return true;
-    return false;
-  }
-}
-
-bool Constant::canTrap() const {
-  SmallPtrSet<const ConstantExpr *, 4> NonTrappingOps;
-  return canTrapImpl(this, NonTrappingOps);
-}
-
 /// Check if C contains a GlobalValue for which Predicate is true.
 static bool
 ConstHasGlobalValuePredicate(const Constant *C,
@@ -2992,7 +2956,6 @@
     return ExtractValueInst::Create(Ops[0], getIndices());
   case Instruction::ShuffleVector:
     return new ShuffleVectorInst(Ops[0], Ops[1], Ops[2]);
-
   case Instruction::GetElementPtr: {
     const auto *GO = cast<GEPOperator>(this);
     if (GO->isInBounds())
@@ -3009,9 +2972,37 @@
     return UnaryOperator::Create((Instruction::UnaryOps)getOpcode(), Ops[0]);
   default:
     assert(getNumOperands() == 2 && "Must be binary operator?");
+    Constant *Op0 = getOperand(0);
+    Constant *Op1 = getOperand(1);
+    if (getOpcode() == Instruction::UDiv || getOpcode() == Instruction::URem) {
+      // Ensure the denominator is not zero.
+      Constant *Zero = Constant::getNullValue(getType());
+      Constant *One = ConstantInt::get(getType(), 1);
+      Constant *IsZero = ConstantExpr::getICmp(CmpInst::ICMP_EQ, Op1, Zero);
+      Op1 = ConstantExpr::getSelect(IsZero, One, Op1);
+    }
+    if (getOpcode() == Instruction::SDiv || getOpcode() == Instruction::SRem) {
+      // Ensure the denominator is not zero, and we are not dividing INT_MIN
+      // by -1.
+      unsigned BitWidth = getType()->getScalarSizeInBits();
+      assert(BitWidth != 1 && "One-bit divide should be folded away");
+      Constant *Zero = Constant::getNullValue(getType());
+      Constant *NegOne = Constant::getAllOnesValue(getType());
+      Constant *One = ConstantInt::get(getType(), 1);
+      Constant *SignedMin =
+          ConstantInt::get(getType(), APInt::getSignedMinValue(BitWidth));
+      Constant *IsZero =
+          ConstantExpr::getICmp(CmpInst::ICMP_EQ, Op1, Zero);
+      Constant *IsNegOne =
+          ConstantExpr::getICmp(CmpInst::ICMP_EQ, Op1, NegOne);
+      Constant *IsIntMin =
+          ConstantExpr::getICmp(CmpInst::ICMP_EQ, Op0, SignedMin);
+      Constant *IsOverflowing = ConstantExpr::getAnd(IsNegOne, IsIntMin);
+      Constant *IsUndefined = ConstantExpr::getOr(IsOverflowing, IsZero);
+      Op1 = ConstantExpr::getSelect(IsUndefined, One, Op1);
+    }
     BinaryOperator *BO =
-      BinaryOperator::Create((Instruction::BinaryOps)getOpcode(),
-                             Ops[0], Ops[1]);
+        BinaryOperator::Create((Instruction::BinaryOps)getOpcode(), Op0, Op1);
     if (isa<OverflowingBinaryOperator>(BO)) {
       BO->setHasNoUnsignedWrap(SubclassOptionalData &
                                OverflowingBinaryOperator::NoUnsignedWrap);
Index: lib/Transforms/Utils/SimplifyCFG.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyCFG.cpp
+++ lib/Transforms/Utils/SimplifyCFG.cpp
@@ -308,7 +308,8 @@
 /// expensive.
 static unsigned ComputeSpeculationCost(const User *I,
                                        const TargetTransformInfo &TTI) {
-  assert(isSafeToSpeculativelyExecute(I) &&
+  assert(!isa<Instruction>(I) ||
+         isSafeToSpeculativelyExecute(cast<Instruction>(I)) &&
          "Instruction is not safe to speculatively execute!");
   return TTI.getUserCost(I);
 }
@@ -343,14 +344,8 @@
     return false;
 
   Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) {
-    // Non-instructions all dominate instructions, but not all constantexprs
-    // can be executed unconditionally.
-    if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
-      if (C->canTrap())
-        return false;
+  if (!I)
     return true;
-  }
   BasicBlock *PBB = I->getParent();
 
   // We don't want to allow weird loops that might have the "if condition" in
@@ -1378,11 +1373,6 @@
       if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
           passingValueIsAlwaysUndefined(BB2V, &PN))
         return Changed;
-
-      if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
-        return Changed;
-      if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
-        return Changed;
     }
   }
 
@@ -2056,9 +2046,6 @@
     if (!OrigCE && !ThenCE)
       continue; // Known safe and cheap.
 
-    if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
-        (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
-      return false;
     unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
     unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
     unsigned MaxCost =
@@ -2467,18 +2454,6 @@
     if (FVPN->getParent() == FalseSucc)
       FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
 
-  // In order for this transformation to be safe, we must be able to
-  // unconditionally execute both operands to the return.  This is
-  // normally the case, but we could have a potentially-trapping
-  // constant expression that prevents this transformation from being
-  // safe.
-  if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
-    if (TCV->canTrap())
-      return false;
-  if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
-    if (FCV->canTrap())
-      return false;
-
   // Okay, we collected all the mapped values and checked them for sanity, and
   // defined to really do this transformation.  First, update the CFG.
   TrueSucc->removePredecessor(BI->getParent());
@@ -2634,15 +2609,6 @@
       return false;
   }
 
-  // Cond is known to be a compare or binary operator.  Check to make sure that
-  // neither operand is a potentially-trapping constant expression.
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
-    if (CE->canTrap())
-      return false;
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
-    if (CE->canTrap())
-      return false;
-
   // Finally, don't infinitely unroll conditional loops.
   BasicBlock *TrueDest = BI->getSuccessor(0);
   BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
@@ -3244,10 +3210,6 @@
     }
   }
 
-  if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
-    if (CE->canTrap())
-      return false;
-
   // If both branches are conditional and both contain stores to the same
   // address, remove the stores from the conditionals and create a conditional
   // merged store at the end.
@@ -3288,29 +3250,12 @@
   // Do not perform this transformation if it would require
   // insertion of a large number of select instructions. For targets
   // without predication/cmovs, this is a big pessimization.
-
-  // Also do not perform this transformation if any phi node in the common
-  // destination block can trap when reached by BB or PBB (PR17073). In that
-  // case, it would be unsafe to hoist the operation into a select instruction.
-
   BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
   unsigned NumPhis = 0;
   for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
        ++II, ++NumPhis) {
     if (NumPhis > 2) // Disable this xform.
       return false;
-
-    PHINode *PN = cast<PHINode>(II);
-    Value *BIV = PN->getIncomingValueForBlock(BB);
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
-      if (CE->canTrap())
-        return false;
-
-    unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
-    Value *PBIV = PN->getIncomingValue(PBBIdx);
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
-      if (CE->canTrap())
-        return false;
   }
 
   // Finally, if everything is ok, fold the branches to logical ops.
Index: lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -378,20 +378,6 @@
   return true;
 }
 
-/// Check whether it is safe to if-convert this phi node.
-///
-/// Phi nodes with constant expressions that can trap are not safe to if
-/// convert.
-static bool canIfConvertPHINodes(BasicBlock *BB) {
-  for (PHINode &Phi : BB->phis()) {
-    for (Value *V : Phi.incoming_values())
-      if (auto *C = dyn_cast<Constant>(V))
-        if (C->canTrap())
-          return false;
-  }
-  return true;
-}
-
 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
   if (Ty->isPointerTy())
     return DL.getIntPtrType(Ty);
@@ -877,12 +863,6 @@
   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
 
   for (Instruction &I : *BB) {
-    // Check that we don't have a constant expression that can trap as operand.
-    for (Value *Operand : I.operands()) {
-      if (auto *C = dyn_cast<Constant>(Operand))
-        if (C->canTrap())
-          return false;
-    }
     // We might be able to hoist the load.
     if (I.mayReadFromMemory()) {
       auto *LI = dyn_cast<LoadInst>(&I);
@@ -941,7 +921,6 @@
   }
 
   // Collect the blocks that need predication.
-  BasicBlock *Header = TheLoop->getHeader();
   for (BasicBlock *BB : TheLoop->blocks()) {
     // We don't support switch statements inside loops.
     if (!isa<BranchInst>(BB->getTerminator())) {
@@ -960,12 +939,6 @@
             "NoCFGForSelect", BB->getTerminator());
         return false;
       }
-    } else if (BB != Header && !canIfConvertPHINodes(BB)) {
-      reportVectorizationFailure(
-          "Control flow cannot be substituted for a select",
-          "control flow cannot be substituted for a select",
-          "NoCFGForSelect", BB->getTerminator());
-      return false;
     }
   }
 
Index: test/CodeGen/X86/critical-edge-split-2.ll
===================================================================
--- test/CodeGen/X86/critical-edge-split-2.ll
+++ test/CodeGen/X86/critical-edge-split-2.ll
@@ -21,6 +21,8 @@
 ; CHECK-NEXT:    cmpq %rax, %rcx
 ; CHECK-NEXT:    sete %sil
 ; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:    cmovnel %eax, %esi
+; CHECK-NEXT:    movl $1, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %esi
 ; CHECK-NEXT:    movl %edx, %eax
Index: test/CodeGen/X86/divide-constant-expression.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/divide-constant-expression.ll
@@ -0,0 +1,390 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -verify-machineinstrs | FileCheck %s -check-prefix=SDAG
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel -verify-machineinstrs | FileCheck %s -check-prefix=FAST
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=0 -verify-machineinstrs | FileCheck %s  -check-prefix=GLOBAL
+
+@g1 = extern_weak global i8
+@g2 = extern_weak global i8
+
+define i32 @test1(i1 %c) {
+; SDAG-LABEL: test1:
+; SDAG:       # %bb.0: # %entry
+; SDAG-NEXT:    movl $g1, %eax
+; SDAG-NEXT:    xorl $-2147483648, %eax # imm = 0x80000000
+; SDAG-NEXT:    movl $g2, %esi
+; SDAG-NEXT:    movl $g2, %ecx
+; SDAG-NEXT:    notl %ecx
+; SDAG-NEXT:    orl %eax, %ecx
+; SDAG-NEXT:    sete %al
+; SDAG-NEXT:    testl %esi, %esi
+; SDAG-NEXT:    sete %cl
+; SDAG-NEXT:    orb %al, %cl
+; SDAG-NEXT:    movl $1, %ecx
+; SDAG-NEXT:    cmovnel %ecx, %esi
+; SDAG-NEXT:    movl $g1, %eax
+; SDAG-NEXT:    cltd
+; SDAG-NEXT:    idivl %esi
+; SDAG-NEXT:    testb $1, %dil
+; SDAG-NEXT:    je .LBB0_2
+; SDAG-NEXT:  # %bb.1:
+; SDAG-NEXT:    movl %eax, %ecx
+; SDAG-NEXT:  .LBB0_2: # %cond.end.i
+; SDAG-NEXT:    movl %ecx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: test1:
+; FAST:       # %bb.0: # %entry
+; FAST-NEXT:    movl $g1, %eax
+; FAST-NEXT:    xorl $-2147483648, %eax # imm = 0x80000000
+; FAST-NEXT:    movl $g2, %esi
+; FAST-NEXT:    movl $g2, %ecx
+; FAST-NEXT:    notl %ecx
+; FAST-NEXT:    orl %eax, %ecx
+; FAST-NEXT:    sete %al
+; FAST-NEXT:    testl %esi, %esi
+; FAST-NEXT:    sete %cl
+; FAST-NEXT:    orb %al, %cl
+; FAST-NEXT:    movl $1, %ecx
+; FAST-NEXT:    cmovnel %ecx, %esi
+; FAST-NEXT:    movl $g1, %eax
+; FAST-NEXT:    cltd
+; FAST-NEXT:    idivl %esi
+; FAST-NEXT:    testb $1, %dil
+; FAST-NEXT:    je .LBB0_2
+; FAST-NEXT:  # %bb.1:
+; FAST-NEXT:    movl %eax, %ecx
+; FAST-NEXT:  .LBB0_2: # %cond.end.i
+; FAST-NEXT:    movl %ecx, %eax
+; FAST-NEXT:    retq
+;
+; GLOBAL-LABEL: test1:
+; GLOBAL:       # %bb.0: # %entry
+; GLOBAL-NEXT:    movl $g1, %eax
+; GLOBAL-NEXT:    xorl $-2147483648, %eax # imm = 0x80000000
+; GLOBAL-NEXT:    movl $g2, %esi
+; GLOBAL-NEXT:    movl $g2, %ecx
+; GLOBAL-NEXT:    notl %ecx
+; GLOBAL-NEXT:    orl %eax, %ecx
+; GLOBAL-NEXT:    sete %al
+; GLOBAL-NEXT:    testl %esi, %esi
+; GLOBAL-NEXT:    sete %cl
+; GLOBAL-NEXT:    orb %al, %cl
+; GLOBAL-NEXT:    movl $1, %ecx
+; GLOBAL-NEXT:    cmovnel %ecx, %esi
+; GLOBAL-NEXT:    movl $g1, %eax
+; GLOBAL-NEXT:    cltd
+; GLOBAL-NEXT:    idivl %esi
+; GLOBAL-NEXT:    testb $1, %dil
+; GLOBAL-NEXT:    je .LBB0_2
+; GLOBAL-NEXT:  # %bb.1:
+; GLOBAL-NEXT:    movl %eax, %ecx
+; GLOBAL-NEXT:  .LBB0_2: # %cond.end.i
+; GLOBAL-NEXT:    movl %ecx, %eax
+; GLOBAL-NEXT:    retq
+entry:
+  br i1 %c, label %cond.end.i, label %cond.false.i
+
+cond.false.i:
+  br label %cond.end.i
+
+cond.end.i:
+  %r = phi i32 [ sdiv (i32 ptrtoint (i8* @g1 to i32), i32 ptrtoint (i8* @g2 to i32)), %entry ], [ 1, %cond.false.i ]
+  ret i32 %r
+}
+
+define i32 @test2(i1 %c) {
+; SDAG-LABEL: test2:
+; SDAG:       # %bb.0: # %entry
+; SDAG-NEXT:    movl $g2, %esi
+; SDAG-NEXT:    cmpl $1, %esi
+; SDAG-NEXT:    movl $1, %ecx
+; SDAG-NEXT:    cmovbel %ecx, %esi
+; SDAG-NEXT:    movl $g1, %eax
+; SDAG-NEXT:    xorl %edx, %edx
+; SDAG-NEXT:    divl %esi
+; SDAG-NEXT:    testb $1, %dil
+; SDAG-NEXT:    je .LBB1_2
+; SDAG-NEXT:  # %bb.1:
+; SDAG-NEXT:    movl %eax, %ecx
+; SDAG-NEXT:  .LBB1_2: # %cond.end.i
+; SDAG-NEXT:    movl %ecx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: test2:
+; FAST:       # %bb.0: # %entry
+; FAST-NEXT:    movl $g2, %esi
+; FAST-NEXT:    cmpl $1, %esi
+; FAST-NEXT:    movl $1, %ecx
+; FAST-NEXT:    cmovbel %ecx, %esi
+; FAST-NEXT:    movl $g1, %eax
+; FAST-NEXT:    xorl %edx, %edx
+; FAST-NEXT:    divl %esi
+; FAST-NEXT:    testb $1, %dil
+; FAST-NEXT:    je .LBB1_2
+; FAST-NEXT:  # %bb.1:
+; FAST-NEXT:    movl %eax, %ecx
+; FAST-NEXT:  .LBB1_2: # %cond.end.i
+; FAST-NEXT:    movl %ecx, %eax
+; FAST-NEXT:    retq
+;
+; GLOBAL-LABEL: test2:
+; GLOBAL:       # %bb.0: # %entry
+; GLOBAL-NEXT:    movl $g2, %esi
+; GLOBAL-NEXT:    cmpl $1, %esi
+; GLOBAL-NEXT:    movl $1, %ecx
+; GLOBAL-NEXT:    cmovbel %ecx, %esi
+; GLOBAL-NEXT:    movl $g1, %eax
+; GLOBAL-NEXT:    xorl %edx, %edx
+; GLOBAL-NEXT:    divl %esi
+; GLOBAL-NEXT:    testb $1, %dil
+; GLOBAL-NEXT:    je .LBB1_2
+; GLOBAL-NEXT:  # %bb.1:
+; GLOBAL-NEXT:    movl %eax, %ecx
+; GLOBAL-NEXT:  .LBB1_2: # %cond.end.i
+; GLOBAL-NEXT:    movl %ecx, %eax
+; GLOBAL-NEXT:    retq
+entry:
+  br i1 %c, label %cond.end.i, label %cond.false.i
+
+cond.false.i:
+  br label %cond.end.i
+
+cond.end.i:
+  %r = phi i32 [ udiv (i32 ptrtoint (i8* @g1 to i32), i32 ptrtoint (i8* @g2 to i32)), %entry ], [ 1, %cond.false.i ]
+  ret i32 %r
+}
+
+define i32 @test3(i1 %c) {
+; SDAG-LABEL: test3:
+; SDAG:       # %bb.0: # %entry
+; SDAG-NEXT:    movl $g1, %eax
+; SDAG-NEXT:    xorl $-2147483648, %eax # imm = 0x80000000
+; SDAG-NEXT:    movl $g2, %esi
+; SDAG-NEXT:    movl $g2, %ecx
+; SDAG-NEXT:    notl %ecx
+; SDAG-NEXT:    orl %eax, %ecx
+; SDAG-NEXT:    sete %al
+; SDAG-NEXT:    testl %esi, %esi
+; SDAG-NEXT:    sete %cl
+; SDAG-NEXT:    orb %al, %cl
+; SDAG-NEXT:    movl $1, %ecx
+; SDAG-NEXT:    cmovnel %ecx, %esi
+; SDAG-NEXT:    movl $g1, %eax
+; SDAG-NEXT:    cltd
+; SDAG-NEXT:    idivl %esi
+; SDAG-NEXT:    testb $1, %dil
+; SDAG-NEXT:    je .LBB2_2
+; SDAG-NEXT:  # %bb.1:
+; SDAG-NEXT:    movl %edx, %ecx
+; SDAG-NEXT:  .LBB2_2: # %cond.end.i
+; SDAG-NEXT:    movl %ecx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: test3:
+; FAST:       # %bb.0: # %entry
+; FAST-NEXT:    movl $g1, %eax
+; FAST-NEXT:    xorl $-2147483648, %eax # imm = 0x80000000
+; FAST-NEXT:    movl $g2, %esi
+; FAST-NEXT:    movl $g2, %ecx
+; FAST-NEXT:    notl %ecx
+; FAST-NEXT:    orl %eax, %ecx
+; FAST-NEXT:    sete %al
+; FAST-NEXT:    testl %esi, %esi
+; FAST-NEXT:    sete %cl
+; FAST-NEXT:    orb %al, %cl
+; FAST-NEXT:    movl $1, %ecx
+; FAST-NEXT:    cmovnel %ecx, %esi
+; FAST-NEXT:    movl $g1, %eax
+; FAST-NEXT:    cltd
+; FAST-NEXT:    idivl %esi
+; FAST-NEXT:    testb $1, %dil
+; FAST-NEXT:    je .LBB2_2
+; FAST-NEXT:  # %bb.1:
+; FAST-NEXT:    movl %edx, %ecx
+; FAST-NEXT:  .LBB2_2: # %cond.end.i
+; FAST-NEXT:    movl %ecx, %eax
+; FAST-NEXT:    retq
+;
+; GLOBAL-LABEL: test3:
+; GLOBAL:       # %bb.0: # %entry
+; GLOBAL-NEXT:    movl $g1, %eax
+; GLOBAL-NEXT:    xorl $-2147483648, %eax # imm = 0x80000000
+; GLOBAL-NEXT:    movl $g2, %esi
+; GLOBAL-NEXT:    movl $g2, %ecx
+; GLOBAL-NEXT:    notl %ecx
+; GLOBAL-NEXT:    orl %eax, %ecx
+; GLOBAL-NEXT:    sete %al
+; GLOBAL-NEXT:    testl %esi, %esi
+; GLOBAL-NEXT:    sete %cl
+; GLOBAL-NEXT:    orb %al, %cl
+; GLOBAL-NEXT:    movl $1, %ecx
+; GLOBAL-NEXT:    cmovnel %ecx, %esi
+; GLOBAL-NEXT:    movl $g1, %eax
+; GLOBAL-NEXT:    cltd
+; GLOBAL-NEXT:    idivl %esi
+; GLOBAL-NEXT:    testb $1, %dil
+; GLOBAL-NEXT:    je .LBB2_2
+; GLOBAL-NEXT:  # %bb.1:
+; GLOBAL-NEXT:    movl %edx, %ecx
+; GLOBAL-NEXT:  .LBB2_2: # %cond.end.i
+; GLOBAL-NEXT:    movl %ecx, %eax
+; GLOBAL-NEXT:    retq
+entry:
+  br i1 %c, label %cond.end.i, label %cond.false.i
+
+cond.false.i:
+  br label %cond.end.i
+
+cond.end.i:
+  %r = phi i32 [ srem (i32 ptrtoint (i8* @g1 to i32), i32 ptrtoint (i8* @g2 to i32)), %entry ], [ 1, %cond.false.i ]
+  ret i32 %r
+}
+
+define i32 @test4(i1 %c) {
+; SDAG-LABEL: test4:
+; SDAG:       # %bb.0: # %entry
+; SDAG-NEXT:    movl $g2, %esi
+; SDAG-NEXT:    cmpl $1, %esi
+; SDAG-NEXT:    movl $1, %ecx
+; SDAG-NEXT:    cmovbel %ecx, %esi
+; SDAG-NEXT:    movl $g1, %eax
+; SDAG-NEXT:    xorl %edx, %edx
+; SDAG-NEXT:    divl %esi
+; SDAG-NEXT:    testb $1, %dil
+; SDAG-NEXT:    je .LBB3_2
+; SDAG-NEXT:  # %bb.1:
+; SDAG-NEXT:    movl %edx, %ecx
+; SDAG-NEXT:  .LBB3_2: # %cond.end.i
+; SDAG-NEXT:    movl %ecx, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: test4:
+; FAST:       # %bb.0: # %entry
+; FAST-NEXT:    movl $g2, %esi
+; FAST-NEXT:    cmpl $1, %esi
+; FAST-NEXT:    movl $1, %ecx
+; FAST-NEXT:    cmovbel %ecx, %esi
+; FAST-NEXT:    movl $g1, %eax
+; FAST-NEXT:    xorl %edx, %edx
+; FAST-NEXT:    divl %esi
+; FAST-NEXT:    testb $1, %dil
+; FAST-NEXT:    je .LBB3_2
+; FAST-NEXT:  # %bb.1:
+; FAST-NEXT:    movl %edx, %ecx
+; FAST-NEXT:  .LBB3_2: # %cond.end.i
+; FAST-NEXT:    movl %ecx, %eax
+; FAST-NEXT:    retq
+;
+; GLOBAL-LABEL: test4:
+; GLOBAL:       # %bb.0: # %entry
+; GLOBAL-NEXT:    movl $g2, %esi
+; GLOBAL-NEXT:    cmpl $1, %esi
+; GLOBAL-NEXT:    movl $1, %ecx
+; GLOBAL-NEXT:    cmovbel %ecx, %esi
+; GLOBAL-NEXT:    movl $g1, %eax
+; GLOBAL-NEXT:    xorl %edx, %edx
+; GLOBAL-NEXT:    divl %esi
+; GLOBAL-NEXT:    testb $1, %dil
+; GLOBAL-NEXT:    je .LBB3_2
+; GLOBAL-NEXT:  # %bb.1:
+; GLOBAL-NEXT:    movl %edx, %ecx
+; GLOBAL-NEXT:  .LBB3_2: # %cond.end.i
+; GLOBAL-NEXT:    movl %ecx, %eax
+; GLOBAL-NEXT:    retq
+entry:
+  br i1 %c, label %cond.end.i, label %cond.false.i
+
+cond.false.i:
+  br label %cond.end.i
+
+cond.end.i:
+  %r = phi i32 [ urem (i32 ptrtoint (i8* @g1 to i32), i32 ptrtoint (i8* @g2 to i32)), %entry ], [ 1, %cond.false.i ]
+  ret i32 %r
+}
+
+define i32 @test5(i32 %c) {
+; SDAG-LABEL: test5:
+; SDAG:       # %bb.0: # %entry
+; SDAG-NEXT:    movl $g1, %eax
+; SDAG-NEXT:    xorl $-2147483648, %eax # imm = 0x80000000
+; SDAG-NEXT:    movl $g2, %ecx
+; SDAG-NEXT:    movl $g2, %edx
+; SDAG-NEXT:    notl %edx
+; SDAG-NEXT:    orl %eax, %edx
+; SDAG-NEXT:    sete %al
+; SDAG-NEXT:    testl %ecx, %ecx
+; SDAG-NEXT:    sete %dl
+; SDAG-NEXT:    orb %al, %dl
+; SDAG-NEXT:    movl $1, %eax
+; SDAG-NEXT:    cmovnel %eax, %ecx
+; SDAG-NEXT:    movl $g1, %eax
+; SDAG-NEXT:    cltd
+; SDAG-NEXT:    idivl %ecx
+; SDAG-NEXT:    #APP
+; SDAG-NEXT:    #NO_APP
+; SDAG-NEXT:  .Ltmp0: # Block address taken
+; SDAG-NEXT:  .LBB4_1: # %cond.false.i
+; SDAG-NEXT:    movl $1, %eax
+; SDAG-NEXT:  .LBB4_2: # %cond.end.i
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: test5:
+; FAST:       # %bb.0: # %entry
+; FAST-NEXT:    movl $g1, %eax
+; FAST-NEXT:    xorl $-2147483648, %eax # imm = 0x80000000
+; FAST-NEXT:    movl $g2, %ecx
+; FAST-NEXT:    movl $g2, %edx
+; FAST-NEXT:    notl %edx
+; FAST-NEXT:    orl %eax, %edx
+; FAST-NEXT:    sete %al
+; FAST-NEXT:    testl %ecx, %ecx
+; FAST-NEXT:    sete %dl
+; FAST-NEXT:    orb %al, %dl
+; FAST-NEXT:    movl $1, %eax
+; FAST-NEXT:    cmovnel %eax, %ecx
+; FAST-NEXT:    movl $g1, %eax
+; FAST-NEXT:    cltd
+; FAST-NEXT:    idivl %ecx
+; FAST-NEXT:    #APP
+; FAST-NEXT:    #NO_APP
+; FAST-NEXT:  .Ltmp0: # Block address taken
+; FAST-NEXT:  .LBB4_1: # %cond.false.i
+; FAST-NEXT:    movl $1, %eax
+; FAST-NEXT:  .LBB4_2: # %cond.end.i
+; FAST-NEXT:    retq
+;
+; GLOBAL-LABEL: test5:
+; GLOBAL:       # %bb.0: # %entry
+; GLOBAL-NEXT:    movl $g1, %eax
+; GLOBAL-NEXT:    xorl $-2147483648, %eax # imm = 0x80000000
+; GLOBAL-NEXT:    movl $g2, %ecx
+; GLOBAL-NEXT:    movl $g2, %edx
+; GLOBAL-NEXT:    notl %edx
+; GLOBAL-NEXT:    orl %eax, %edx
+; GLOBAL-NEXT:    sete %al
+; GLOBAL-NEXT:    testl %ecx, %ecx
+; GLOBAL-NEXT:    sete %dl
+; GLOBAL-NEXT:    orb %al, %dl
+; GLOBAL-NEXT:    movl $1, %eax
+; GLOBAL-NEXT:    cmovnel %eax, %ecx
+; GLOBAL-NEXT:    movl $g1, %eax
+; GLOBAL-NEXT:    cltd
+; GLOBAL-NEXT:    idivl %ecx
+; GLOBAL-NEXT:    #APP
+; GLOBAL-NEXT:    #NO_APP
+; GLOBAL-NEXT:  .Ltmp0: # Block address taken
+; GLOBAL-NEXT:  .LBB4_1: # %cond.false.i
+; GLOBAL-NEXT:    movl $1, %eax
+; GLOBAL-NEXT:  .LBB4_2: # %cond.end.i
+; GLOBAL-NEXT:    retq
+entry:
+  callbr void asm "", "r,X"(i32 %c, i8 *blockaddress(@test5, %cond.false.i))
+            to label %cond.false.i [label %cond.end.i]
+
+cond.false.i:
+  br label %cond.end.i
+
+cond.end.i:
+  %r = phi i32 [ sdiv (i32 ptrtoint (i8* @g1 to i32), i32 ptrtoint (i8* @g2 to i32)), %entry ], [ 1, %cond.false.i ]
+  ret i32 %r
+}
Index: test/Transforms/LoopVectorize/X86/masked_load_store.ll
===================================================================
--- test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -1502,47 +1502,189 @@
 @a = common global [1 x i32*] zeroinitializer, align 8
 @c = common global i32* null, align 8
 
-; The loop here should not be vectorized due to trapping
-; constant expression
+; Constant expressions never trap; check that we perform the transform
+; consistently.
 
 define void @foo5(i32* nocapture %A, i32* nocapture readnone %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 {
 ; AVX-LABEL: @foo5(
 ; AVX-NEXT:  entry:
+; AVX-NEXT:    [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8*
+; AVX-NEXT:    [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
+; AVX-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; AVX:       vector.memcheck:
+; AVX-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A]], i64 10000
+; AVX-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
+; AVX-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
+; AVX-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
+; AVX-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
+; AVX-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
+; AVX-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; AVX-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
+; AVX-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; AVX:       vector.ph:
+; AVX-NEXT:    br label [[VECTOR_BODY:%.*]]
+; AVX:       vector.body:
+; AVX-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; AVX-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
+; AVX-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX-NEXT:    [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+; AVX-NEXT:    [[INDUCTION6:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+; AVX-NEXT:    [[INDUCTION7:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23>
+; AVX-NEXT:    [[INDUCTION8:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
+; AVX-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; AVX-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 8
+; AVX-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 16
+; AVX-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 24
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
+; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
+; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
+; AVX-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>*
+; AVX-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP9]], align 4, !alias.scope !41
+; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 8
+; AVX-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>*
+; AVX-NEXT:    [[WIDE_LOAD9:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4, !alias.scope !41
+; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16
+; AVX-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>*
+; AVX-NEXT:    [[WIDE_LOAD10:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !41
+; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 24
+; AVX-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>*
+; AVX-NEXT:    [[WIDE_LOAD11:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4, !alias.scope !41
+; AVX-NEXT:    [[TMP16:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX-NEXT:    [[TMP17:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD9]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX-NEXT:    [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD10]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX-NEXT:    [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD11]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
+; AVX-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
+; AVX-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
+; AVX-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
+; AVX-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0
+; AVX-NEXT:    [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <8 x i32>*
+; AVX-NEXT:    call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> <i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32))>, <8 x i32>* [[TMP25]], i32 4, <8 x i1> [[TMP16]]), !alias.scope !44, !noalias !41
+; AVX-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 8
+; AVX-NEXT:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <8 x i32>*
+; AVX-NEXT:    call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> <i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32))>, <8 x i32>* [[TMP27]], i32 4, <8 x i1> [[TMP17]]), !alias.scope !44, !noalias !41
+; AVX-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 16
+; AVX-NEXT:    [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <8 x i32>*
+; AVX-NEXT:    call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> <i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32))>, <8 x i32>* [[TMP29]], i32 4, <8 x i1> [[TMP18]]), !alias.scope !44, !noalias !41
+; AVX-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 24
+; AVX-NEXT:    [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <8 x i32>*
+; AVX-NEXT:    call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> <i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32))>, <8 x i32>* [[TMP31]], i32 4, <8 x i1> [[TMP19]]), !alias.scope !44, !noalias !41
+; AVX-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 32
+; AVX-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984
+; AVX-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !46
+; AVX:       middle.block:
+; AVX-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10000, 9984
+; AVX-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; AVX:       scalar.ph:
+; AVX-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
 ; AVX-NEXT:    br label [[FOR_BODY:%.*]]
 ; AVX:       for.body:
-; AVX-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; AVX-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; AVX-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP0]], 100
+; AVX-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; AVX-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; AVX-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP33]], 100
 ; AVX-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; AVX:       if.then:
-; AVX-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; AVX-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
 ; AVX-NEXT:    store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32* [[ARRAYIDX7]], align 4
 ; AVX-NEXT:    br label [[FOR_INC]]
 ; AVX:       for.inc:
 ; AVX-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AVX-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
-; AVX-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; AVX-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !47
 ; AVX:       for.end:
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @foo5(
 ; AVX512-NEXT:  entry:
+; AVX512-NEXT:    [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8*
+; AVX512-NEXT:    [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
+; AVX512-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; AVX512:       vector.memcheck:
+; AVX512-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A]], i64 10000
+; AVX512-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
+; AVX512-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
+; AVX512-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
+; AVX512-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
+; AVX512-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
+; AVX512-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; AVX512-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
+; AVX512-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; AVX512:       vector.ph:
+; AVX512-NEXT:    br label [[VECTOR_BODY:%.*]]
+; AVX512:       vector.body:
+; AVX512-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; AVX512-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> undef, i64 [[INDEX]], i32 0
+; AVX512-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT:    [[INDUCTION:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+; AVX512-NEXT:    [[INDUCTION6:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
+; AVX512-NEXT:    [[INDUCTION7:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 32, i64 33, i64 34, i64 35, i64 36, i64 37, i64 38, i64 39, i64 40, i64 41, i64 42, i64 43, i64 44, i64 45, i64 46, i64 47>
+; AVX512-NEXT:    [[INDUCTION8:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], <i64 48, i64 49, i64 50, i64 51, i64 52, i64 53, i64 54, i64 55, i64 56, i64 57, i64 58, i64 59, i64 60, i64 61, i64 62, i64 63>
+; AVX512-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; AVX512-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 16
+; AVX512-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 32
+; AVX512-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 48
+; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
+; AVX512-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
+; AVX512-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
+; AVX512-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
+; AVX512-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
+; AVX512-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <16 x i32>*
+; AVX512-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP9]], align 4, !alias.scope !51
+; AVX512-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16
+; AVX512-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <16 x i32>*
+; AVX512-NEXT:    [[WIDE_LOAD9:%.*]] = load <16 x i32>, <16 x i32>* [[TMP11]], align 4, !alias.scope !51
+; AVX512-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 32
+; AVX512-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <16 x i32>*
+; AVX512-NEXT:    [[WIDE_LOAD10:%.*]] = load <16 x i32>, <16 x i32>* [[TMP13]], align 4, !alias.scope !51
+; AVX512-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 48
+; AVX512-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <16 x i32>*
+; AVX512-NEXT:    [[WIDE_LOAD11:%.*]] = load <16 x i32>, <16 x i32>* [[TMP15]], align 4, !alias.scope !51
+; AVX512-NEXT:    [[TMP16:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX512-NEXT:    [[TMP17:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD9]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX512-NEXT:    [[TMP18:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD10]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX512-NEXT:    [[TMP19:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD11]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX512-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
+; AVX512-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
+; AVX512-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
+; AVX512-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
+; AVX512-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0
+; AVX512-NEXT:    [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <16 x i32>*
+; AVX512-NEXT:    call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> <i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32))>, <16 x i32>* [[TMP25]], i32 4, <16 x i1> [[TMP16]]), !alias.scope !54, !noalias !51
+; AVX512-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 16
+; AVX512-NEXT:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <16 x i32>*
+; AVX512-NEXT:    call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> <i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32))>, <16 x i32>* [[TMP27]], i32 4, <16 x i1> [[TMP17]]), !alias.scope !54, !noalias !51
+; AVX512-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 32
+; AVX512-NEXT:    [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <16 x i32>*
+; AVX512-NEXT:    call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> <i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32))>, <16 x i32>* [[TMP29]], i32 4, <16 x i1> [[TMP18]]), !alias.scope !54, !noalias !51
+; AVX512-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 48
+; AVX512-NEXT:    [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <16 x i32>*
+; AVX512-NEXT:    call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> <i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32))>, <16 x i32>* [[TMP31]], i32 4, <16 x i1> [[TMP19]]), !alias.scope !54, !noalias !51
+; AVX512-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 64
+; AVX512-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984
+; AVX512-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !56
+; AVX512:       middle.block:
+; AVX512-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10000, 9984
+; AVX512-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; AVX512:       scalar.ph:
+; AVX512-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
 ; AVX512-NEXT:    br label [[FOR_BODY:%.*]]
 ; AVX512:       for.body:
-; AVX512-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; AVX512-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP0]], 100
+; AVX512-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; AVX512-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP33]], 100
 ; AVX512-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; AVX512:       if.then:
-; AVX512-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
 ; AVX512-NEXT:    store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32* [[ARRAYIDX7]], align 4
 ; AVX512-NEXT:    br label [[FOR_INC]]
 ; AVX512:       for.inc:
 ; AVX512-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AVX512-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
-; AVX512-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; AVX512-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !57
 ; AVX512:       for.end:
 ; AVX512-NEXT:    ret void
 ;
@@ -1648,22 +1790,22 @@
 ; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
 ; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -3
 ; AVX2-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
-; AVX2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !41
+; AVX2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !48
 ; AVX2-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -4
 ; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -3
 ; AVX2-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
-; AVX2-NEXT:    [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !41
+; AVX2-NEXT:    [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !48
 ; AVX2-NEXT:    [[REVERSE16:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD15]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8
 ; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -3
 ; AVX2-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
-; AVX2-NEXT:    [[WIDE_LOAD17:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !41
+; AVX2-NEXT:    [[WIDE_LOAD17:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !48
 ; AVX2-NEXT:    [[REVERSE18:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD17]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -12
 ; AVX2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -3
 ; AVX2-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
-; AVX2-NEXT:    [[WIDE_LOAD19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4, !alias.scope !41
+; AVX2-NEXT:    [[WIDE_LOAD19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4, !alias.scope !48
 ; AVX2-NEXT:    [[REVERSE20:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD19]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP20:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer
 ; AVX2-NEXT:    [[TMP21:%.*]] = icmp sgt <4 x i32> [[REVERSE16]], zeroinitializer
@@ -1677,25 +1819,25 @@
 ; AVX2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -3
 ; AVX2-NEXT:    [[REVERSE21:%.*]] = shufflevector <4 x i1> [[TMP20]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>*
-; AVX2-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE21]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE21]], <4 x double> undef), !alias.scope !51
 ; AVX2-NEXT:    [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -4
 ; AVX2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -3
 ; AVX2-NEXT:    [[REVERSE23:%.*]] = shufflevector <4 x i1> [[TMP21]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>*
-; AVX2-NEXT:    [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> undef), !alias.scope !51
 ; AVX2-NEXT:    [[REVERSE25:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD24]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8
 ; AVX2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -3
 ; AVX2-NEXT:    [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP22]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>*
-; AVX2-NEXT:    [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !51
 ; AVX2-NEXT:    [[REVERSE28:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD27]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -12
 ; AVX2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -3
 ; AVX2-NEXT:    [[REVERSE29:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>*
-; AVX2-NEXT:    [[WIDE_MASKED_LOAD30:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE29]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD30:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE29]], <4 x double> undef), !alias.scope !51
 ; AVX2-NEXT:    [[REVERSE31:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD30]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP40:%.*]] = fadd <4 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
 ; AVX2-NEXT:    [[TMP41:%.*]] = fadd <4 x double> [[REVERSE25]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
@@ -1709,25 +1851,25 @@
 ; AVX2-NEXT:    [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
 ; AVX2-NEXT:    [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -3
 ; AVX2-NEXT:    [[TMP50:%.*]] = bitcast double* [[TMP49]] to <4 x double>*
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE32]], <4 x double>* [[TMP50]], i32 8, <4 x i1> [[REVERSE21]]), !alias.scope !46, !noalias !48
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE32]], <4 x double>* [[TMP50]], i32 8, <4 x i1> [[REVERSE21]]), !alias.scope !53, !noalias !55
 ; AVX2-NEXT:    [[REVERSE34:%.*]] = shufflevector <4 x double> [[TMP41]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -4
 ; AVX2-NEXT:    [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -3
 ; AVX2-NEXT:    [[TMP53:%.*]] = bitcast double* [[TMP52]] to <4 x double>*
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE34]], <4 x double>* [[TMP53]], i32 8, <4 x i1> [[REVERSE23]]), !alias.scope !46, !noalias !48
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE34]], <4 x double>* [[TMP53]], i32 8, <4 x i1> [[REVERSE23]]), !alias.scope !53, !noalias !55
 ; AVX2-NEXT:    [[REVERSE36:%.*]] = shufflevector <4 x double> [[TMP42]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8
 ; AVX2-NEXT:    [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -3
 ; AVX2-NEXT:    [[TMP56:%.*]] = bitcast double* [[TMP55]] to <4 x double>*
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE36]], <4 x double>* [[TMP56]], i32 8, <4 x i1> [[REVERSE26]]), !alias.scope !46, !noalias !48
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE36]], <4 x double>* [[TMP56]], i32 8, <4 x i1> [[REVERSE26]]), !alias.scope !53, !noalias !55
 ; AVX2-NEXT:    [[REVERSE38:%.*]] = shufflevector <4 x double> [[TMP43]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -12
 ; AVX2-NEXT:    [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -3
 ; AVX2-NEXT:    [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE38]], <4 x double>* [[TMP59]], i32 8, <4 x i1> [[REVERSE29]]), !alias.scope !46, !noalias !48
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE38]], <4 x double>* [[TMP59]], i32 8, <4 x i1> [[REVERSE29]]), !alias.scope !53, !noalias !55
 ; AVX2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
 ; AVX2-NEXT:    [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
-; AVX2-NEXT:    br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !49
+; AVX2-NEXT:    br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !56
 ; AVX2:       middle.block:
 ; AVX2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
 ; AVX2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -1750,7 +1892,7 @@
 ; AVX2:       for.inc:
 ; AVX2-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
 ; AVX2-NEXT:    [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
-; AVX2-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !50
+; AVX2-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !57
 ; AVX2:       for.end:
 ; AVX2-NEXT:    ret void
 ;
@@ -1798,22 +1940,22 @@
 ; AVX512-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
 ; AVX512-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -7
 ; AVX512-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>*
-; AVX512-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, !alias.scope !51
+; AVX512-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, !alias.scope !58
 ; AVX512-NEXT:    [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8
 ; AVX512-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -7
 ; AVX512-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>*
-; AVX512-NEXT:    [[WIDE_LOAD15:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !51
+; AVX512-NEXT:    [[WIDE_LOAD15:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !58
 ; AVX512-NEXT:    [[REVERSE16:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD15]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -16
 ; AVX512-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -7
 ; AVX512-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <8 x i32>*
-; AVX512-NEXT:    [[WIDE_LOAD17:%.*]] = load <8 x i32>, <8 x i32>* [[TMP16]], align 4, !alias.scope !51
+; AVX512-NEXT:    [[WIDE_LOAD17:%.*]] = load <8 x i32>, <8 x i32>* [[TMP16]], align 4, !alias.scope !58
 ; AVX512-NEXT:    [[REVERSE18:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD17]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -24
 ; AVX512-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -7
 ; AVX512-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <8 x i32>*
-; AVX512-NEXT:    [[WIDE_LOAD19:%.*]] = load <8 x i32>, <8 x i32>* [[TMP19]], align 4, !alias.scope !51
+; AVX512-NEXT:    [[WIDE_LOAD19:%.*]] = load <8 x i32>, <8 x i32>* [[TMP19]], align 4, !alias.scope !58
 ; AVX512-NEXT:    [[REVERSE20:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD19]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP20:%.*]] = icmp sgt <8 x i32> [[REVERSE]], zeroinitializer
 ; AVX512-NEXT:    [[TMP21:%.*]] = icmp sgt <8 x i32> [[REVERSE16]], zeroinitializer
@@ -1827,25 +1969,25 @@
 ; AVX512-NEXT:    [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -7
 ; AVX512-NEXT:    [[REVERSE21:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP30:%.*]] = bitcast double* [[TMP29]] to <8 x double>*
-; AVX512-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP30]], i32 8, <8 x i1> [[REVERSE21]], <8 x double> undef), !alias.scope !54
+; AVX512-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP30]], i32 8, <8 x i1> [[REVERSE21]], <8 x double> undef), !alias.scope !61
 ; AVX512-NEXT:    [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8
 ; AVX512-NEXT:    [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -7
 ; AVX512-NEXT:    [[REVERSE23:%.*]] = shufflevector <8 x i1> [[TMP21]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP33:%.*]] = bitcast double* [[TMP32]] to <8 x double>*
-; AVX512-NEXT:    [[WIDE_MASKED_LOAD24:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP33]], i32 8, <8 x i1> [[REVERSE23]], <8 x double> undef), !alias.scope !54
+; AVX512-NEXT:    [[WIDE_MASKED_LOAD24:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP33]], i32 8, <8 x i1> [[REVERSE23]], <8 x double> undef), !alias.scope !61
 ; AVX512-NEXT:    [[REVERSE25:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD24]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -16
 ; AVX512-NEXT:    [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -7
 ; AVX512-NEXT:    [[REVERSE26:%.*]] = shufflevector <8 x i1> [[TMP22]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP36:%.*]] = bitcast double* [[TMP35]] to <8 x double>*
-; AVX512-NEXT:    [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP36]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> undef), !alias.scope !54
+; AVX512-NEXT:    [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP36]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> undef), !alias.scope !61
 ; AVX512-NEXT:    [[REVERSE28:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD27]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -24
 ; AVX512-NEXT:    [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -7
 ; AVX512-NEXT:    [[REVERSE29:%.*]] = shufflevector <8 x i1> [[TMP23]], <8 x i1> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP39:%.*]] = bitcast double* [[TMP38]] to <8 x double>*
-; AVX512-NEXT:    [[WIDE_MASKED_LOAD30:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP39]], i32 8, <8 x i1> [[REVERSE29]], <8 x double> undef), !alias.scope !54
+; AVX512-NEXT:    [[WIDE_MASKED_LOAD30:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP39]], i32 8, <8 x i1> [[REVERSE29]], <8 x double> undef), !alias.scope !61
 ; AVX512-NEXT:    [[REVERSE31:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD30]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP40:%.*]] = fadd <8 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
 ; AVX512-NEXT:    [[TMP41:%.*]] = fadd <8 x double> [[REVERSE25]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
@@ -1859,25 +2001,25 @@
 ; AVX512-NEXT:    [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
 ; AVX512-NEXT:    [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -7
 ; AVX512-NEXT:    [[TMP50:%.*]] = bitcast double* [[TMP49]] to <8 x double>*
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE32]], <8 x double>* [[TMP50]], i32 8, <8 x i1> [[REVERSE21]]), !alias.scope !56, !noalias !58
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE32]], <8 x double>* [[TMP50]], i32 8, <8 x i1> [[REVERSE21]]), !alias.scope !63, !noalias !65
 ; AVX512-NEXT:    [[REVERSE34:%.*]] = shufflevector <8 x double> [[TMP41]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8
 ; AVX512-NEXT:    [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -7
 ; AVX512-NEXT:    [[TMP53:%.*]] = bitcast double* [[TMP52]] to <8 x double>*
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE34]], <8 x double>* [[TMP53]], i32 8, <8 x i1> [[REVERSE23]]), !alias.scope !56, !noalias !58
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE34]], <8 x double>* [[TMP53]], i32 8, <8 x i1> [[REVERSE23]]), !alias.scope !63, !noalias !65
 ; AVX512-NEXT:    [[REVERSE36:%.*]] = shufflevector <8 x double> [[TMP42]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -16
 ; AVX512-NEXT:    [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -7
 ; AVX512-NEXT:    [[TMP56:%.*]] = bitcast double* [[TMP55]] to <8 x double>*
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE36]], <8 x double>* [[TMP56]], i32 8, <8 x i1> [[REVERSE26]]), !alias.scope !56, !noalias !58
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE36]], <8 x double>* [[TMP56]], i32 8, <8 x i1> [[REVERSE26]]), !alias.scope !63, !noalias !65
 ; AVX512-NEXT:    [[REVERSE38:%.*]] = shufflevector <8 x double> [[TMP43]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -24
 ; AVX512-NEXT:    [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -7
 ; AVX512-NEXT:    [[TMP59:%.*]] = bitcast double* [[TMP58]] to <8 x double>*
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE38]], <8 x double>* [[TMP59]], i32 8, <8 x i1> [[REVERSE29]]), !alias.scope !56, !noalias !58
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE38]], <8 x double>* [[TMP59]], i32 8, <8 x i1> [[REVERSE29]]), !alias.scope !63, !noalias !65
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 32
 ; AVX512-NEXT:    [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
-; AVX512-NEXT:    br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !59
+; AVX512-NEXT:    br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !66
 ; AVX512:       middle.block:
 ; AVX512-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
 ; AVX512-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -1900,7 +2042,7 @@
 ; AVX512:       for.inc:
 ; AVX512-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
 ; AVX512-NEXT:    [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
-; AVX512-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !60
+; AVX512-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !67
 ; AVX512:       for.end:
 ; AVX512-NEXT:    ret void
 ;
@@ -2038,7 +2180,7 @@
 ; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
 ; AVX1-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
 ; AVX1-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; AVX1-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !41
+; AVX1-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !48
 ; AVX1:       middle.block:
 ; AVX1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
 ; AVX1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -2064,7 +2206,7 @@
 ; AVX1:       for.inc:
 ; AVX1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AVX1-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; AVX1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !42
+; AVX1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !49
 ; AVX1:       for.end.loopexit:
 ; AVX1-NEXT:    br label [[FOR_END]]
 ; AVX1:       for.end:
@@ -2168,7 +2310,7 @@
 ; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
 ; AVX2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
 ; AVX2-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; AVX2-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !51
+; AVX2-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !58
 ; AVX2:       middle.block:
 ; AVX2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
 ; AVX2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -2194,7 +2336,7 @@
 ; AVX2:       for.inc:
 ; AVX2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AVX2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; AVX2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !52
+; AVX2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !59
 ; AVX2:       for.end.loopexit:
 ; AVX2-NEXT:    br label [[FOR_END]]
 ; AVX2:       for.end:
@@ -2298,7 +2440,7 @@
 ; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP63]], i32 8, <8 x i1> [[TMP55]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 32
 ; AVX512-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; AVX512-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !61
+; AVX512-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !68
 ; AVX512:       middle.block:
 ; AVX512-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
 ; AVX512-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -2324,7 +2466,7 @@
 ; AVX512:       for.inc:
 ; AVX512-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AVX512-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; AVX512-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !62
+; AVX512-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !69
 ; AVX512:       for.end.loopexit:
 ; AVX512-NEXT:    br label [[FOR_END]]
 ; AVX512:       for.end:
@@ -2473,7 +2615,7 @@
 ; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
 ; AVX1-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
 ; AVX1-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; AVX1-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !44
+; AVX1-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !51
 ; AVX1:       middle.block:
 ; AVX1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
 ; AVX1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -2499,7 +2641,7 @@
 ; AVX1:       for.inc:
 ; AVX1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AVX1-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; AVX1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !45
+; AVX1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !52
 ; AVX1:       for.end.loopexit:
 ; AVX1-NEXT:    br label [[FOR_END]]
 ; AVX1:       for.end:
@@ -2603,7 +2745,7 @@
 ; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
 ; AVX2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
 ; AVX2-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; AVX2-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !54
+; AVX2-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !61
 ; AVX2:       middle.block:
 ; AVX2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
 ; AVX2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -2629,7 +2771,7 @@
 ; AVX2:       for.inc:
 ; AVX2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AVX2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; AVX2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !55
+; AVX2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !62
 ; AVX2:       for.end.loopexit:
 ; AVX2-NEXT:    br label [[FOR_END]]
 ; AVX2:       for.end:
@@ -2733,7 +2875,7 @@
 ; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP63]], i32 8, <8 x i1> [[TMP55]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 32
 ; AVX512-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; AVX512-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !64
+; AVX512-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !71
 ; AVX512:       middle.block:
 ; AVX512-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
 ; AVX512-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -2759,7 +2901,7 @@
 ; AVX512:       for.inc:
 ; AVX512-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AVX512-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; AVX512-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !65
+; AVX512-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !72
 ; AVX512:       for.end.loopexit:
 ; AVX512-NEXT:    br label [[FOR_END]]
 ; AVX512:       for.end:
Index: test/Transforms/LoopVectorize/if-conversion.ll
===================================================================
--- test/Transforms/LoopVectorize/if-conversion.ll
+++ test/Transforms/LoopVectorize/if-conversion.ll
@@ -108,12 +108,12 @@
 @a = common global [1 x i32*] zeroinitializer, align 8
 @c = common global i32* null, align 8
 
-; We use to if convert this loop. This is not safe because there is a trapping
-; constant expression.
+; Constant expressions never trap; check that we perform the transform
+; consistently.
 ; PR16729
 
 ; CHECK-LABEL: trapping_constant_expression
-; CHECK-NOT: or <4 x i32>
+; CHECK: or <4 x i32>
 
 define i32 @trapping_constant_expression() {
 entry:
@@ -122,13 +122,13 @@
 for.body:
   %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
   %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ]
-  br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end
+  br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 0), i32** getelementptr inbounds (i32*, i32** @c, i64 1)), label %cond.false, label %cond.end
 
 cond.false:
   br label %cond.end
 
 cond.end:
-  %cond = phi i32 [ sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32)), %cond.false ], [ 0, %for.body ]
+  %cond = phi i32 [ sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 0), i32** getelementptr inbounds (i32*, i32** @c, i64 1)) to i32)), %cond.false ], [ 0, %for.body ]
   %or = or i32 %or2, %cond
   %inc = add nsw i32 %inc3, 1
   %cmp = icmp slt i32 %inc, 128
@@ -138,12 +138,11 @@
   ret i32 %or
 }
 
-; Neither should we if-convert if there is an instruction operand that is a
-; trapping constant expression.
+; Constant expressions never trap; check that we perform the transform consistently.
 ; PR16729
 
 ; CHECK-LABEL: trapping_constant_expression2
-; CHECK-NOT: or <4 x i32>
+; CHECK: or <4 x i32>
 
 define i32 @trapping_constant_expression2() {
 entry:
Index: test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
===================================================================
--- test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
+++ test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
@@ -1,29 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; PR957
 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
-; CHECK-NOT: select
-
 @G = extern_weak global i32
 
 define i32 @test(i32 %tmp) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  cond_false179:
+; CHECK-NEXT:    [[TMP181:%.*]] = icmp eq i32 [[TMP:%.*]], 0
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TMP181]], i32 udiv (i32 1, i32 ptrtoint (i32* @G to i32)), i32 [[TMP]]
+; CHECK-NEXT:    ret i32 [[SPEC_SELECT]]
+;
 cond_false179:
-	%tmp181 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
-	br i1 %tmp181, label %cond_true182, label %cond_next185
-cond_true182:		; preds = %cond_false179
-	br label %cond_next185
-cond_next185:		; preds = %cond_true182, %cond_false179
-	%d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ]		; <i32> [#uses=1]
-	ret i32 %d0.3
+  %tmp181 = icmp eq i32 %tmp, 0
+  br i1 %tmp181, label %cond_true182, label %cond_next185
+cond_true182:
+  br label %cond_next185
+cond_next185:
+  %d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ]
+  ret i32 %d0.3
 }
 
 define i32 @test2(i32 %tmp) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  cond_false179:
+; CHECK-NEXT:    [[TMP181:%.*]] = icmp eq i32 [[TMP:%.*]], 0
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TMP181]], i32 udiv (i32 1, i32 ptrtoint (i32* @G to i32)), i32 [[TMP]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @test(i32 4)
+; CHECK-NEXT:    ret i32 [[SPEC_SELECT]]
+;
 cond_false179:
-	%tmp181 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
-	br i1 %tmp181, label %cond_true182, label %cond_next185
+  %tmp181 = icmp eq i32 %tmp, 0
+  br i1 %tmp181, label %cond_true182, label %cond_next185
 cond_true182:		; preds = %cond_false179
-	br label %cond_next185
-cond_next185:		; preds = %cond_true182, %cond_false179
-	%d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ]		; <i32> [#uses=1]
-	call i32 @test( i32 4 )		; <i32>:0 [#uses=0]
-	ret i32 %d0.3
+  br label %cond_next185
+cond_next185:
+  %d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ]
+  call i32 @test( i32 4 )
+  ret i32 %d0.3
 }
Index: test/Transforms/SimplifyCFG/ConditionalTrappingConstantExpr.ll
===================================================================
--- test/Transforms/SimplifyCFG/ConditionalTrappingConstantExpr.ll
+++ test/Transforms/SimplifyCFG/ConditionalTrappingConstantExpr.ll
@@ -4,18 +4,15 @@
 @G = extern_weak global i32
 
 ; PR3354
-; Do not merge bb1 into the entry block, it might trap.
+; Constant expressions never trap; check that we perform the transform consistently.
 
 define i32 @admiral(i32 %a, i32 %b) {
 ; CHECK-LABEL: @admiral(
-; CHECK-NEXT:    [[C:%.*]] = icmp sle i32 %a, %b
-; CHECK-NEXT:    br i1 [[C]], label %bb2, label %bb1
-; CHECK:       bb1:
+; CHECK-NEXT:  bb2:
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = icmp sgt i32 sdiv (i32 -32768, i32 ptrtoint (i32* @G to i32)), 0
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[D]], i32 927, i32 42
-; CHECK-NEXT:    br label %bb2
-; CHECK:       bb2:
-; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 42, %0 ], [ [[DOT]], %bb1 ]
+; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[C]], [[D]]
+; CHECK-NEXT:    [[MERGE:%.*]] = select i1 [[OR_COND]], i32 927, i32 42
 ; CHECK-NEXT:    ret i32 [[MERGE]]
 ;
   %c = icmp sle i32 %a, %b
@@ -31,12 +28,9 @@
 
 define i32 @ackbar(i1 %c) {
 ; CHECK-LABEL: @ackbar(
-; CHECK-NEXT:    br i1 %c, label %bb5, label %bb6
-; CHECK:       bb5:
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 icmp sgt (i32 sdiv (i32 32767, i32 ptrtoint (i32* @G to i32)), i32 0), i32 42, i32 927
-; CHECK-NEXT:    br label %bb6
-; CHECK:       bb6:
-; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 42, %0 ], [ [[DOT]], %bb5 ]
+; CHECK-NEXT:  bb6:
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 icmp sgt (i32 sdiv (i32 32767, i32 ptrtoint (i32* @G to i32)), i32 0), i32 42, i32 927
+; CHECK-NEXT:    [[MERGE:%.*]] = select i1 [[C:%.*]], i32 [[SPEC_SELECT]], i32 42
 ; CHECK-NEXT:    ret i32 [[MERGE]]
 ;
   br i1 %c, label %bb5, label %bb6
@@ -53,8 +47,8 @@
 define i32 @tarp(i1 %c) {
 ; CHECK-LABEL: @tarp(
 ; CHECK-NEXT:  bb9:
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 fcmp oeq (float fdiv (float 3.000000e+00, float sitofp (i32 ptrtoint (i32* @G to i32) to float)), float 1.000000e+00), i32 42, i32 927
-; CHECK-NEXT:    [[MERGE:%.*]] = select i1 %c, i32 [[DOT]], i32 42
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 fcmp oeq (float fdiv (float 3.000000e+00, float sitofp (i32 ptrtoint (i32* @G to i32) to float)), float 1.000000e+00), i32 42, i32 927
+; CHECK-NEXT:    [[MERGE:%.*]] = select i1 [[C:%.*]], i32 [[SPEC_SELECT]], i32 42
 ; CHECK-NEXT:    ret i32 [[MERGE]]
 ;
   br i1 %c, label %bb8, label %bb9
Index: test/Transforms/SimplifyCFG/PR16069.ll
===================================================================
--- test/Transforms/SimplifyCFG/PR16069.ll
+++ test/Transforms/SimplifyCFG/PR16069.ll
@@ -5,7 +5,7 @@
 
 define i32 @foo(i1 %y) {
 ; CHECK-LABEL: @foo(
-; CHECK:         [[COND_I:%.*]] = phi i32 [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb2 ], [ 0, %0 ]
+; CHECK:         [[COND_I:%.*]] = select i1 %y, i32 0, i32 srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32))
 ; CHECK-NEXT:    ret i32 [[COND_I]]
 ;
   br i1 %y, label %bb1, label %bb2
@@ -20,7 +20,7 @@
 
 define i32 @foo2(i1 %x) {
 ; CHECK-LABEL: @foo2(
-; CHECK:         [[COND:%.*]] = phi i32 [ 0, %bb1 ], [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb0 ]
+; CHECK:         [[COND:%.*]] = select i1 %x, i32 0, i32 srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32))
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
 bb0:
Index: test/Transforms/SimplifyCFG/PR17073.ll
===================================================================
--- test/Transforms/SimplifyCFG/PR17073.ll
+++ test/Transforms/SimplifyCFG/PR17073.ll
@@ -1,11 +1,6 @@
 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
-; In PR17073 ( http://llvm.org/pr17073 ), we illegally hoisted an operation that can trap.
-; The first test confirms that we don't do that when the trapping op is reached by the current BB (block1).
-; The second test confirms that we don't do that when the trapping op is reached by the previous BB (entry).
-; The third test confirms that we can still do this optimization for an operation (add) that doesn't trap.
-; The tests must be complicated enough to prevent previous SimplifyCFG actions from optimizing away
-; the instructions that we're checking for.
+; Constant expressions never trap; check that we perform the transform consistently.
 
 target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.9.0"
@@ -14,8 +9,7 @@
 @b = common global i8 0, align 1
 
 ; CHECK-LABEL: can_trap1 
-; CHECK-NOT: or i1 %tobool, icmp eq (i32* bitcast (i8* @b to i32*), i32* @a)
-; CHECK-NOT: select i1 %tobool, i32* null, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a) 
+; CHECK: select i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a), i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), i32* null
 define i32* @can_trap1() {
 entry:
   %0 = load i32, i32* @a, align 4
@@ -34,8 +28,7 @@
 }
 
 ; CHECK-LABEL: can_trap2 
-; CHECK-NOT: or i1 %tobool, icmp eq (i32* bitcast (i8* @b to i32*), i32* @a)
-; CHECK-NOT: select i1 %tobool, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), i32* null
+; CHECK: select i1 %tobool, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), i32* null
 define i32* @can_trap2() {
 entry:
   %0 = load i32, i32* @a, align 4
Index: unittests/IR/ConstantsTest.cpp
===================================================================
--- unittests/IR/ConstantsTest.cpp
+++ unittests/IR/ConstantsTest.cpp
@@ -241,11 +241,23 @@
   CHECK(ConstantExpr::getFSub(P1, P1), "fsub float " P1STR ", " P1STR);
   CHECK(ConstantExpr::getMul(P0, P0), "mul i32 " P0STR ", " P0STR);
   CHECK(ConstantExpr::getFMul(P1, P1), "fmul float " P1STR ", " P1STR);
-  CHECK(ConstantExpr::getUDiv(P0, P0), "udiv i32 " P0STR ", " P0STR);
-  CHECK(ConstantExpr::getSDiv(P0, P0), "sdiv i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getUDiv(P0, P0),
+        "udiv i32 " P0STR ", select (i1 icmp eq (i32 " P0STR
+        ", i32 0), i32 1, i32 " P0STR ")");
+  CHECK(ConstantExpr::getSDiv(P4, P0),
+        "sdiv i32 " P4STR
+        ", select (i1 or (i1 and (i1 icmp eq (i32 " P0STR
+        ", i32 -1), i1 icmp eq (i32 " P4STR
+        ", i32 -2147483648)), i1 icmp eq (i32 " P0STR ", i32 0)), i32 1, i32 "
+        P0STR ")");
   CHECK(ConstantExpr::getFDiv(P1, P1), "fdiv float " P1STR ", " P1STR);
-  CHECK(ConstantExpr::getURem(P0, P0), "urem i32 " P0STR ", " P0STR);
-  CHECK(ConstantExpr::getSRem(P0, P0), "srem i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getURem(P0, P0), "urem i32 " P0STR ", select (i1 icmp eq (i32 " P0STR ", i32 0), i32 1, i32 " P0STR ")");
+  CHECK(ConstantExpr::getSRem(P4, P0),
+        "srem i32 " P4STR
+        ", select (i1 or (i1 and (i1 icmp eq (i32 " P0STR
+        ", i32 -1), i1 icmp eq (i32 " P4STR
+        ", i32 -2147483648)), i1 icmp eq (i32 " P0STR ", i32 0)), i32 1, i32 "
+        P0STR ")");
   CHECK(ConstantExpr::getFRem(P1, P1), "frem float " P1STR ", " P1STR);
   CHECK(ConstantExpr::getAnd(P0, P0), "and i32 " P0STR ", " P0STR);
   CHECK(ConstantExpr::getOr(P0, P0), "or i32 " P0STR ", " P0STR);
@@ -266,7 +278,9 @@
   CHECK(ConstantExpr::getFPExtend(P1, DoubleTy), "fpext float " P1STR
         " to double");
 
-  CHECK(ConstantExpr::getExactUDiv(P0, P0), "udiv exact i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getExactUDiv(P0, P0),
+        "udiv exact i32 " P0STR ", select (i1 icmp eq (i32 " P0STR
+        ", i32 0), i32 1, i32 " P0STR ")");
 
   CHECK(ConstantExpr::getSelect(P3, P0, P4), "select i1 " P3STR ", i32 " P0STR
         ", i32 " P4STR);