Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -50,7 +50,7 @@
 /// and if one of the extracts should be transformed to a shufflevector, set
 /// \p ConvertToShuffle to that extract instruction.
 static bool isExtractExtractCheap(Instruction *Ext0, Instruction *Ext1,
-                                  unsigned Opcode,
+                                  const Instruction &OpInst,
                                   const TargetTransformInfo &TTI,
                                   Instruction *&ConvertToShuffle,
                                   unsigned PreferredExtractIndex) {
@@ -62,6 +62,7 @@
   int ScalarOpCost, VectorOpCost;
 
   // Get cost estimates for scalar and vector versions of the operation.
+  unsigned Opcode = OpInst.getOpcode();
   bool IsBinOp = Instruction::isBinaryOp(Opcode);
   if (IsBinOp) {
     ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
@@ -79,6 +80,8 @@
   // both sequences.
   unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
   unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
+  Value *Ext0Vector = Ext0->getOperand(0);
+  Value *Ext1Vector = Ext1->getOperand(0);
 
   int Extract0Cost = TTI.getVectorInstrCost(Instruction::ExtractElement,
                                             VecTy, Ext0Index);
@@ -97,7 +100,7 @@
   // Extra uses of the extracts mean that we include those costs in the
   // vector total because those instructions will not be eliminated.
   int OldCost, NewCost;
-  if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
+  if (Ext0Vector == Ext1Vector && Ext0Index == Ext1Index) {
     // Handle a special case. If the 2 extracts are identical, adjust the
     // formulas to account for that. The extra use charge allows for either the
     // CSE'd pattern or an unoptimized form with identical values:
@@ -118,35 +121,52 @@
   if (Ext0Index == Ext1Index) {
     // If the extract indexes are identical, no shuffle is needed.
     ConvertToShuffle = nullptr;
-  } else {
-    if (IsBinOp && DisableBinopExtractShuffle)
-      return true;
-
-    // If we are extracting from 2 different indexes, then one operand must be
-    // shuffled before performing the vector operation. The shuffle mask is
-    // undefined except for 1 lane that is being translated to the remaining
-    // extraction lane. Therefore, it is a splat shuffle. Ex:
-    // ShufMask = { undef, undef, 0, undef }
-    // TODO: The cost model has an option for a "broadcast" shuffle
-    //       (splat-from-element-0), but no option for a more general splat.
-    NewCost +=
-        TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
-
-    // The more expensive extract will be replaced by a shuffle. If the costs
-    // are equal and there is a preferred extract index, shuffle the opposite
-    // operand. Otherwise, replace the extract with the higher index.
-    if (Extract0Cost > Extract1Cost)
-      ConvertToShuffle = Ext0;
-    else if (Extract1Cost > Extract0Cost)
-      ConvertToShuffle = Ext1;
-    else if (PreferredExtractIndex == Ext0Index)
-      ConvertToShuffle = Ext1;
-    else if (PreferredExtractIndex == Ext1Index)
-      ConvertToShuffle = Ext0;
-    else
-      ConvertToShuffle = Ext0Index > Ext1Index ? Ext0 : Ext1;
+    return OldCost < NewCost;
   }
 
+  if (Ext0Vector == Ext1Vector && OpInst.hasOneUse()) {
+    // If match the reduction pattern, do nothing.
+    const auto *ReductionInst = OpInst.user_back();
+    // User instruction is same operation.
+    if (ReductionInst->getOpcode() == Opcode) {
+      assert(ReductionInst->getNumOperands() == 2 &&
+             "Expected a binary operation or compare");
+      Value *SecondOp = ReductionInst->getOperand(0) == &OpInst
+                            ? ReductionInst->getOperand(1)
+                            : ReductionInst->getOperand(0);
+      auto *Ext2 = dyn_cast<ExtractElementInst>(SecondOp);
+      if (Ext2 && Ext2->getOperand(0) == Ext0Vector)
+        return true;
+    }
+  }
+
+  if (IsBinOp && DisableBinopExtractShuffle)
+    return true;
+
+  // If we are extracting from 2 different indexes, then one operand must be
+  // shuffled before performing the vector operation. The shuffle mask is
+  // undefined except for 1 lane that is being translated to the remaining
+  // extraction lane. Therefore, it is a splat shuffle. Ex:
+  // ShufMask = { undef, undef, 0, undef }
+  // TODO: The cost model has an option for a "broadcast" shuffle
+  //       (splat-from-element-0), but no option for a more general splat.
+  NewCost +=
+      TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+
+  // The more expensive extract will be replaced by a shuffle. If the costs
+  // are equal and there is a preferred extract index, shuffle the opposite
+  // operand. Otherwise, replace the extract with the higher index.
+  if (Extract0Cost > Extract1Cost)
+    ConvertToShuffle = Ext0;
+  else if (Extract1Cost > Extract0Cost)
+    ConvertToShuffle = Ext1;
+  else if (PreferredExtractIndex == Ext0Index)
+    ConvertToShuffle = Ext1;
+  else if (PreferredExtractIndex == Ext1Index)
+    ConvertToShuffle = Ext0;
+  else
+    ConvertToShuffle = Ext0Index > Ext1Index ? Ext0 : Ext1;
+
   // Aggressively form a vector op if the cost is equal because the transform
   // may enable further optimization.
   // Codegen can reverse this transform (scalarize) if it was not profitable.
@@ -226,8 +246,7 @@
                                          m_ConstantInt(InsertIndex)));
 
   Instruction *ConvertToShuffle;
-  if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), TTI, ConvertToShuffle,
-                            InsertIndex))
+  if (isExtractExtractCheap(Ext0, Ext1, I, TTI, ConvertToShuffle, InsertIndex))
     return false;
 
   if (ConvertToShuffle) {
Index: llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
===================================================================
--- llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
+++ llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
@@ -486,3 +486,23 @@
   %v3 = insertelement <4 x float> %v2, float %b23, i32 3
   ret <4 x float> %v3
 }
+
+; Negative test - simd reduction operation.
+
+define i32 @ext_ext_reduction(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @ext_ext_reduction(
+; CHECK-NEXT:    %and = and <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NOT:    [[TMP1:%.*]] = shufflevector <4 x i32> %and, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;
+  %and = and <4 x i32> %x, %y
+  %vecext = extractelement <4 x i32> %and, i32 0
+  %vecext.1 = extractelement <4 x i32> %and, i32 1
+  %add.1 = or i32 %vecext.1, %vecext
+  %vecext.2 = extractelement <4 x i32> %and, i32 2
+  %add.2 = or i32 %vecext.2, %add.1
+  %vecext.3 = extractelement <4 x i32> %and, i32 3
+  %add.3 = or i32 %vecext.3, %add.2
+  ret i32 %add.3
+}
+
+