Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -342,6 +342,8 @@
       : OpValue(OpValue), MainOp(MainOp), AltOp(AltOp) {}
 };
 
+enum BinOpFlags {NoFlag, NUW, NSW, NUWNSW};
+
 } // end anonymous namespace
 
 /// Chooses the correct key for scheduling data. If \p Op has the same (or
@@ -354,11 +356,25 @@
   return S.OpValue;
 }
 
+
+static BinOpFlags getBinOpFlags(BinaryOperator *I) {
+  BinOpFlags Ret = NoFlag;
+  if (!isa<OverflowingBinaryOperator>(I))
+    return Ret;
+  if (I->hasNoUnsignedWrap())
+    Ret = NUW;
+  if (I->hasNoSignedWrap())
+    return ((Ret == NUW) ? Ret = NUWNSW : NSW);
+  return Ret;
+}
+
 /// \returns analysis of the Instructions in \p VL described in
 /// InstructionsState, the Opcode that we suppose the whole list
 /// could be vectorized even if its structure is diverse.
 static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
                                        unsigned BaseIndex = 0) {
+  BinOpFlags MainOpFlags, AltOpFlags;
+
   // Make sure these are all Instructions.
   if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
     return InstructionsState(VL[BaseIndex], nullptr, nullptr);
@@ -369,16 +385,30 @@
   unsigned AltOpcode = Opcode;
   unsigned AltIndex = BaseIndex;
 
+  if (IsBinOp)
+    MainOpFlags = getBinOpFlags(cast<BinaryOperator>(VL[BaseIndex]));
+
   // Check for one alternate opcode from another BinaryOperator.
   // TODO - generalize to support all operators (types, calls etc.).
   for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
     unsigned InstOpcode = cast<Instruction>(VL[Cnt])->getOpcode();
-    if (IsBinOp && isa<BinaryOperator>(VL[Cnt])) {
-      if (InstOpcode == Opcode || InstOpcode == AltOpcode)
+    BinaryOperator *BOP = dyn_cast<BinaryOperator>(VL[Cnt]);
+    if (IsBinOp && BOP) {
+      BinOpFlags OpFlags = getBinOpFlags(BOP);
+      if (InstOpcode == Opcode) {
+        if (OpFlags != MainOpFlags)
+          return InstructionsState(VL[BaseIndex], nullptr, nullptr);
+        continue;
+      }
+      if (InstOpcode == AltOpcode) {
+        if (OpFlags != AltOpFlags)
+          return InstructionsState(VL[BaseIndex], nullptr, nullptr);
         continue;
+      }
       if (Opcode == AltOpcode) {
         AltOpcode = InstOpcode;
         AltIndex = Cnt;
+        AltOpFlags = OpFlags;
         continue;
       }
     } else if (IsCastOp && isa<CastInst>(VL[Cnt])) {
Index: test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
===================================================================
--- test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
+++ test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
@@ -29,9 +29,9 @@
 ; YAML-NEXT: Function:        getelementptr_4x32
 ; YAML-NEXT: Args:
 ; YAML-NEXT:   - String:          'SLP vectorized with cost '
-; YAML-NEXT:   - Cost:            '11'
+; YAML-NEXT:   - Cost:            '6'
 ; YAML-NEXT:   - String:          ' and with tree size '
-; YAML-NEXT:   - TreeSize:        '5'
+; YAML-NEXT:   - TreeSize:        '3'
 
 ; YAML:      --- !Passed
 ; YAML-NEXT: Pass:            slp-vectorizer
@@ -54,44 +54,42 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[Z:%.*]], i32 3
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP21:%.*]], i32 1
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD16:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP21]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
-; CHECK-NEXT:    [[T4:%.*]] = shl nsw i32 [[TMP5]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> undef, i32 [[T4]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP2]]
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0
-; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
+; CHECK-NEXT:    [[T4:%.*]] = shl nsw i32 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> undef, i32 [[T4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[T6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
-; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP11]]
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1
-; CHECK-NEXT:    [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i32> [[TMP7]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP12]]
 ; CHECK-NEXT:    [[T8:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2
-; CHECK-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP14]]
 ; CHECK-NEXT:    [[T10:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4
 ; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
-; CHECK-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
-; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3
+; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP16]]
 ; CHECK-NEXT:    [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4
-; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i32 0
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[ADD11]], i32 1
-; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <2 x i32> <i32 1, i32 undef>, i32 [[T12]], i32 1
-; CHECK-NEXT:    [[TMP21]] = add nsw <2 x i32> [[TMP19]], [[TMP20]]
-; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TMP22]], [[N]]
+; CHECK-NEXT:    [[ADD16]] = add nsw i32 [[ADD11]], [[T12]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i32 [[TMP4]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <2 x i32> undef, i32 [[INDVARS_IV_NEXT]], i32 0
+; CHECK-NEXT:    [[TMP18]] = insertelement <2 x i32> [[TMP17]], i32 [[ADD16]], i32 1
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
 ;
 entry:
@@ -133,16 +131,6 @@
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 }
 
-; YAML:      --- !Passed
-; YAML-NEXT: Pass:            slp-vectorizer
-; YAML-NEXT: Name:            VectorizedList
-; YAML-NEXT: Function:        getelementptr_2x32
-; YAML-NEXT: Args:
-; YAML-NEXT:   - String:          'SLP vectorized with cost '
-; YAML-NEXT:   - Cost:            '11'
-; YAML-NEXT:   - String:          ' and with tree size '
-; YAML-NEXT:   - TreeSize:        '5'
-
 ; YAML:      --- !Passed
 ; YAML-NEXT: Pass:            slp-vectorizer
 ; YAML-NEXT: Name:            VectorizedList
@@ -163,43 +151,41 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP18:%.*]], i32 1
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD16:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP18]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
-; CHECK-NEXT:    [[T4:%.*]] = shl nsw i32 [[TMP4]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[T4]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
+; CHECK-NEXT:    [[T4:%.*]] = shl nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[T4]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[T6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
-; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP6]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP5]]
 ; CHECK-NEXT:    [[T7:%.*]] = or i32 [[T4]], 1
-; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[T7]] to i64
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[T7]] to i64
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP6]]
 ; CHECK-NEXT:    [[T8:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[T4]], i32 0
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i32> [[TMP9]], [[TMP1]]
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0
-; CHECK-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[T4]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <2 x i32> [[TMP8]], [[TMP1]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i32> [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[T10:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4
 ; CHECK-NEXT:    [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
-; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
-; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i32> [[TMP9]], i32 1
+; CHECK-NEXT:    [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP13]]
 ; CHECK-NEXT:    [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[TMP4]], i32 0
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[ADD11]], i32 1
-; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <2 x i32> <i32 1, i32 undef>, i32 [[T12]], i32 1
-; CHECK-NEXT:    [[TMP18]] = add nsw <2 x i32> [[TMP16]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x i32> [[TMP18]], i32 0
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TMP19]], [[N]]
+; CHECK-NEXT:    [[ADD16]] = add nsw i32 [[ADD11]], [[T12]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> undef, i32 [[INDVARS_IV_NEXT]], i32 0
+; CHECK-NEXT:    [[TMP15]] = insertelement <2 x i32> [[TMP14]], i32 [[ADD16]], i32 1
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
 ;
 entry:
Index: test/Transforms/SLPVectorizer/X86/pr35497.ll
===================================================================
--- test/Transforms/SLPVectorizer/X86/pr35497.ll
+++ test/Transforms/SLPVectorizer/X86/pr35497.ll
@@ -58,19 +58,20 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 20, i64 20>
 ; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[AND_4:%.*]] = shl i64 [[ADD]], 2
 ; CHECK-NEXT:    [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> undef, i64 [[TMP5]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[ADD]], i32 1
-; CHECK-NEXT:    [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], <i64 2, i64 2>
-; CHECK-NEXT:    [[TMP9:%.*]] = and <2 x i64> [[TMP8]], <i64 20, i64 20>
+; CHECK-NEXT:    [[AND_5:%.*]] = shl nuw nsw i64 [[TMP5]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> undef, i64 [[AND_5]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[AND_4]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = and <2 x i64> [[TMP7]], <i64 20, i64 20>
 ; CHECK-NEXT:    [[ARRAYIDX2_6:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 0
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64* [[ARRAYIDX2_6]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* [[TMP10]], align 1
-; CHECK-NEXT:    [[TMP11:%.*]] = lshr <2 x i64> [[TMP4]], <i64 6, i64 6>
-; CHECK-NEXT:    [[TMP12:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* [[TMP13]], align 1
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64* [[ARRAYIDX2_6]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* [[TMP9]], align 1
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr <2 x i64> [[TMP4]], <i64 6, i64 6>
+; CHECK-NEXT:    [[TMP11:%.*]] = add nuw nsw <2 x i64> [[TMP8]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[TMP11]], <2 x i64>* [[TMP12]], align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:
Index: test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
===================================================================
--- test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
+++ test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -122,11 +122,18 @@
 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
 ; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
 ; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, i32* [[IDX1]], align 4
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, i32* [[IDX2]], align 4
+; CHECK-NEXT:    [[LOAD3:%.*]] = load i32, i32* [[IDX3]], align 4
+; CHECK-NEXT:    [[LOAD4:%.*]] = load i32, i32* [[IDX4]], align 4
+; CHECK-NEXT:    [[OP1:%.*]] = add nsw i32 [[LOAD1]], 1
+; CHECK-NEXT:    [[OP2:%.*]] = add nsw i32 [[LOAD2]], 1
+; CHECK-NEXT:    [[OP3:%.*]] = add nsw i32 [[LOAD3]], 1
+; CHECK-NEXT:    [[OP4:%.*]] = add i32 [[LOAD4]], 1
+; CHECK-NEXT:    store i32 [[OP1]], i32* [[IDX1]], align 4
+; CHECK-NEXT:    store i32 [[OP2]], i32* [[IDX2]], align 4
+; CHECK-NEXT:    store i32 [[OP3]], i32* [[IDX3]], align 4
+; CHECK-NEXT:    store i32 [[OP4]], i32* [[IDX4]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
@@ -194,11 +201,18 @@
 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
 ; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
 ; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, i32* [[IDX1]], align 4
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, i32* [[IDX2]], align 4
+; CHECK-NEXT:    [[LOAD3:%.*]] = load i32, i32* [[IDX3]], align 4
+; CHECK-NEXT:    [[LOAD4:%.*]] = load i32, i32* [[IDX4]], align 4
+; CHECK-NEXT:    [[OP1:%.*]] = add nuw i32 [[LOAD1]], 1
+; CHECK-NEXT:    [[OP2:%.*]] = add i32 [[LOAD2]], 1
+; CHECK-NEXT:    [[OP3:%.*]] = add i32 [[LOAD3]], 1
+; CHECK-NEXT:    [[OP4:%.*]] = add nuw i32 [[LOAD4]], 1
+; CHECK-NEXT:    store i32 [[OP1]], i32* [[IDX1]], align 4
+; CHECK-NEXT:    store i32 [[OP2]], i32* [[IDX2]], align 4
+; CHECK-NEXT:    store i32 [[OP3]], i32* [[IDX3]], align 4
+; CHECK-NEXT:    store i32 [[OP4]], i32* [[IDX4]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
@@ -412,13 +426,18 @@
 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
 ; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
 ; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, i32* [[IDX1]], align 4
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, i32* [[IDX2]], align 4
+; CHECK-NEXT:    [[LOAD3:%.*]] = load i32, i32* [[IDX3]], align 4
+; CHECK-NEXT:    [[LOAD4:%.*]] = load i32, i32* [[IDX4]], align 4
+; CHECK-NEXT:    [[OP1:%.*]] = add nsw i32 [[LOAD1]], 1
+; CHECK-NEXT:    [[OP2:%.*]] = sub nsw i32 [[LOAD2]], 1
+; CHECK-NEXT:    [[OP3:%.*]] = add nsw i32 [[LOAD3]], 1
+; CHECK-NEXT:    [[OP4:%.*]] = sub i32 [[LOAD4]], 1
+; CHECK-NEXT:    store i32 [[OP1]], i32* [[IDX1]], align 4
+; CHECK-NEXT:    store i32 [[OP2]], i32* [[IDX2]], align 4
+; CHECK-NEXT:    store i32 [[OP3]], i32* [[IDX3]], align 4
+; CHECK-NEXT:    store i32 [[OP4]], i32* [[IDX4]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
@@ -450,13 +469,18 @@
 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
 ; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2
 ; CHECK-NEXT:    [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, i32* [[IDX1]], align 4
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, i32* [[IDX2]], align 4
+; CHECK-NEXT:    [[LOAD3:%.*]] = load i32, i32* [[IDX3]], align 4
+; CHECK-NEXT:    [[LOAD4:%.*]] = load i32, i32* [[IDX4]], align 4
+; CHECK-NEXT:    [[OP1:%.*]] = add i32 [[LOAD1]], 1
+; CHECK-NEXT:    [[OP2:%.*]] = sub nsw i32 [[LOAD2]], 1
+; CHECK-NEXT:    [[OP3:%.*]] = add nsw i32 [[LOAD3]], 1
+; CHECK-NEXT:    [[OP4:%.*]] = sub i32 [[LOAD4]], 1
+; CHECK-NEXT:    store i32 [[OP1]], i32* [[IDX1]], align 4
+; CHECK-NEXT:    store i32 [[OP2]], i32* [[IDX2]], align 4
+; CHECK-NEXT:    store i32 [[OP3]], i32* [[IDX3]], align 4
+; CHECK-NEXT:    store i32 [[OP4]], i32* [[IDX4]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %idx1 = getelementptr inbounds i32, i32* %x, i64 0
Index: test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
===================================================================
--- test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
+++ test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
@@ -14,30 +14,28 @@
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i16> undef, i16 [[TMP]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i16> [[TMP0]], i16 undef, i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32>
-; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw <2 x i32> <i32 63, i32 undef>, [[REORDER_SHUFFLE]]
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef
-; CHECK-NEXT:    [[SHUFFLE8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE8]], <i32 undef, i32 15, i32 31, i32 47>
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 undef, undef
-; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 undef, i32 undef
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], undef
-; CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef
-; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], undef
-; CHECK-NEXT:    [[RDX_SHUF9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-; CHECK-NEXT:    [[RDX_MINMAX_CMP10:%.*]] = icmp sgt <4 x i32> [[TMP5]], [[RDX_SHUF9]]
-; CHECK-NEXT:    [[RDX_MINMAX_SELECT11:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP10]], <4 x i32> [[TMP5]], <4 x i32> [[RDX_SHUF9]]
-; CHECK-NEXT:    [[RDX_SHUF12:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT11]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[RDX_MINMAX_CMP13:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT11]], [[RDX_SHUF12]]
-; CHECK-NEXT:    [[RDX_MINMAX_SELECT14:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP13]], <4 x i32> [[RDX_MINMAX_SELECT11]], <4 x i32> [[RDX_SHUF12]]
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT14]], i32 0
-; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef
-; CHECK-NEXT:    [[TMP19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw i32 undef, [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], undef
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw i32 63, [[TMP4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], undef
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], undef
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP6]], 15
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
+; CHECK-NEXT:    [[TMP13:%.*]] = add nsw i32 [[TMP6]], 31
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]]
+; CHECK-NEXT:    [[TMP16:%.*]] = add nsw i32 [[TMP6]], 47
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]]
+; CHECK-NEXT:    [[TMP19:%.*]] = select i1 undef, i32 [[TMP18]], i32 undef
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], 63
-; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]]
-; CHECK-NEXT:    [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -49, i32 -33, i32 -33, i32 -17>
+; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <2 x i32> undef, [[TMP2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = sub <2 x i32> [[TMP5]], undef
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -49, i32 -33, i32 -33, i32 -17>
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp sgt i32 undef, undef
 ; CHECK-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], i32 undef, i32 undef
 ; CHECK-NEXT:    [[TMP28:%.*]] = icmp sgt i32 [[TMP27]], undef
@@ -53,23 +51,23 @@
 ; CHECK-NEXT:    [[TMP41:%.*]] = icmp sgt i32 undef, undef
 ; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i32 undef, i32 undef
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], [[TMP39]]
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP9]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP9]], <4 x i32> [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP7]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP7]], <4 x i32> [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp slt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp slt i32 [[TMP10]], undef
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[OP_EXTRA]], undef
-; CHECK-NEXT:    [[OP_EXTRA4:%.*]] = select i1 [[TMP12]], i32 [[OP_EXTRA]], i32 undef
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp slt i32 [[OP_EXTRA4]], undef
-; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA4]], i32 undef
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i32 [[OP_EXTRA5]], undef
-; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = select i1 [[TMP14]], i32 [[OP_EXTRA5]], i32 undef
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[OP_EXTRA6]], undef
-; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = select i1 [[TMP15]], i32 [[OP_EXTRA6]], i32 undef
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i32 [[TMP8]], undef
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt i32 [[OP_EXTRA]], undef
+; CHECK-NEXT:    [[OP_EXTRA4:%.*]] = select i1 [[TMP10]], i32 [[OP_EXTRA]], i32 undef
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp slt i32 [[OP_EXTRA4]], undef
+; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = select i1 [[TMP11]], i32 [[OP_EXTRA4]], i32 undef
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[OP_EXTRA5]], undef
+; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = select i1 [[TMP12]], i32 [[OP_EXTRA5]], i32 undef
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp slt i32 [[OP_EXTRA6]], undef
+; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA6]], i32 undef
 ; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP39]], i32 [[TMP42]]
 ; CHECK-NEXT:    [[TMP45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA7]]
 ; CHECK-NEXT:    unreachable