diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1079,6 +1079,11 @@
                                  VectorType *SubTp = nullptr,
                                  ArrayRef<const Value *> Args = None) const;
 
+  /// \Returns the cost of an add-sub instruction. This corresponds to a shuffle
+  /// + fadd + fsub pattern in IR.
+  InstructionCost getAltInstrCost(VectorType *VecTy, ArrayRef<int> ShuffleMask,
+                                  ArrayRef<const Instruction *> Args) const;
+
   /// Represents a hint about the context in which a cast is used.
   ///
   /// For zext/sext, the context of the cast is the operand, which must be a
@@ -1700,6 +1705,9 @@
                                          ArrayRef<int> Mask, int Index,
                                          VectorType *SubTp,
                                          ArrayRef<const Value *> Args) = 0;
+  virtual InstructionCost
+  getAltInstrCost(VectorType *VecTy, ArrayRef<int> ShuffleMask,
+                  ArrayRef<const Instruction *> Args) const = 0;
   virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
                                            Type *Src, CastContextHint CCH,
                                            TTI::TargetCostKind CostKind,
@@ -2237,6 +2245,11 @@
                                  ArrayRef<const Value *> Args) override {
     return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp, Args);
   }
+  InstructionCost
+  getAltInstrCost(VectorType *VecTy, ArrayRef<int> ShuffleMask,
+                  ArrayRef<const Instruction *> Args) const override {
+    return Impl.getAltInstrCost(VecTy, ShuffleMask, Args);
+  }
   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                    CastContextHint CCH,
                                    TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -503,6 +503,11 @@
     return 1;
   }
 
+  InstructionCost getAltInstrCost(VectorType *VecTy, ArrayRef<int> ShuffleMask,
+                                  ArrayRef<const Instruction *> Args) const {
+    return 1;
+  }
+
   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                    TTI::CastContextHint CCH,
                                    TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -765,6 +765,15 @@
   return Cost;
 }
 
+InstructionCost
+TargetTransformInfo::getAltInstrCost(VectorType *VecTy,
+                                     ArrayRef<int> ShuffleMask,
+                                     ArrayRef<const Instruction *> Args) const {
+  InstructionCost Cost = TTIImpl->getAltInstrCost(VecTy, ShuffleMask, Args);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
 TTI::CastContextHint
 TargetTransformInfo::getCastContextHint(const Instruction *I) {
   if (!I)
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -138,6 +138,8 @@
                                  ArrayRef<int> Mask, int Index,
                                  VectorType *SubTp,
                                  ArrayRef<const Value *> Args = None);
+  InstructionCost getAltInstrCost(VectorType *VecTy, ArrayRef<int> ShuffleMask,
+                                  ArrayRef<const Instruction *> Args) const;
   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                    TTI::CastContextHint CCH,
                                    TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1638,6 +1638,34 @@
   return BaseT::getShuffleCost(Kind, BaseTp, Mask, Index, SubTp);
 }
 
+InstructionCost
+X86TTIImpl::getAltInstrCost(VectorType *VecTy, ArrayRef<int> ShuffleMask,
+                            ArrayRef<const Instruction *> Args) const {
+  assert(cast<FixedVectorType>(VecTy)->getNumElements() == ShuffleMask.size() &&
+         "Expected same number of lanes");
+  assert(Args.size() == 2 && isa<BinaryOperator>(Args[0]) &&
+         isa<BinaryOperator>(Args[1]) && "Not an AddSub instruction");
+#ifndef NDEBUG
+  // For the Add-sub pattern we need the operands of the operands to be same:
+  unsigned Opcode0 = cast<BinaryOperator>(Args[0])->getOpcode();
+  unsigned Opcode1 = cast<BinaryOperator>(Args[1])->getOpcode();
+  SmallBitVector OpcodeMask(ShuffleMask.size(), 0);
+  unsigned NumLanes = ShuffleMask.size();
+  for (unsigned Lane : seq<unsigned>(0, NumLanes))
+    if (ShuffleMask[Lane] >= (int)NumLanes)
+      OpcodeMask.set(Lane);
+  assert(isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask) &&
+         "We can only accept patterns matched by isLegalAltInstr()");
+#endif
+  if (ST->hasSSE3())
+    return 2; // 2xf32 addsubps, 2xf64 addsubpd
+  if (ST->hasAVX())
+    return 1; // 4xf32 vaddsubps, 2xf64 vaddsubpd
+  if (ST->hasAVX2())
+    return 1; // 8xf32 vaddsubps, 4xf64 vaddsubpd
+  llvm_unreachable("Should have been rejected by isLegalAltInstr()");
+}
+
 InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
                                              Type *Src,
                                              TTI::CastContextHint CCH,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -939,6 +939,10 @@
   /// insertelement nodes, otherwise skip them.
   Optional<OrdersType> getReorderingData(const TreeEntry &TE, bool TopToBottom);
 
+  /// \Returns true if \p TE is an alt-shuffle that can be lowered to a single
+  /// instruction. An example of this is the X86 addsub instruction.
+  bool isAltShuffleThatLowersToOneInstr(const TreeEntry *TE) const;
+
   /// Reorders the current graph to the most profitable order starting from the
   /// root node to the leaf nodes. The best order is chosen only from the nodes
   /// of the same size (vectorization factor). Smaller nodes are considered
@@ -3601,6 +3605,22 @@
   return None;
 }
 
+bool BoUpSLP::isAltShuffleThatLowersToOneInstr(const TreeEntry *TE) const {
+  if (!TE->isAltShuffle())
+    return false;
+  VectorType *VecTy =
+      FixedVectorType::get(TE->Scalars[0]->getType(), TE->Scalars.size());
+  unsigned Opcode0 = TE->getOpcode();
+  unsigned Opcode1 = TE->getAltOpcode();
+  // The opcode mask selects between the two opcodes.
+  SmallBitVector OpcodeMask(TE->Scalars.size(), 0);
+  for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size()))
+    if (cast<Instruction>(TE->Scalars[Lane])->getOpcode() == Opcode1)
+      OpcodeMask.set(Lane);
+  // If this pattern is supported by the target then we consider the order.
+  return TTI->isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
+}
+
 void BoUpSLP::reorderTopToBottom() {
   // Maps VF to the graph nodes.
   DenseMap<unsigned, SetVector<TreeEntry *>> VFToOrderedEntries;
@@ -3633,21 +3653,9 @@
     // Patterns like [fadd,fsub] can be combined into a single instruction in
     // x86. Reordering them into [fsub,fadd] blocks this pattern. So we need
     // to take into account their order when looking for the most used order.
-    if (TE->isAltShuffle()) {
-      VectorType *VecTy =
-          FixedVectorType::get(TE->Scalars[0]->getType(), TE->Scalars.size());
-      unsigned Opcode0 = TE->getOpcode();
-      unsigned Opcode1 = TE->getAltOpcode();
-      // The opcode mask selects between the two opcodes.
-      SmallBitVector OpcodeMask(TE->Scalars.size(), 0);
-      for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size()))
-        if (cast<Instruction>(TE->Scalars[Lane])->getOpcode() == Opcode1)
-          OpcodeMask.set(Lane);
-      // If this pattern is supported by the target then we consider the order.
-      if (TTI->isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
-        VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
-        AltShufflesToOrders.try_emplace(TE.get(), OrdersType());
-      }
+    if (isAltShuffleThatLowersToOneInstr(TE.get())) {
+      VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+      AltShufflesToOrders.try_emplace(TE.get(), OrdersType());
       // TODO: Check the reverse order too.
     }
 
@@ -6267,8 +6275,10 @@
         ScalarCost += TTI->getInstructionCost(I, CostKind);
       }
       // VecCost is equal to sum of the cost of creating 2 vectors
-      // and the cost of creating shuffle.
+      // and the cost of creating shuffle, except if this is an alternate
+      // sequence that can be lowered to a single instruction, like x86 addsub.
       InstructionCost VecCost = 0;
+      bool LowersToOneAltInstr = isAltShuffleThatLowersToOneInstr(E);
       // Try to find the previous shuffle node with the same operands and same
       // main/alternate ops.
       auto &&TryFindNodeWithEqualOperands = [this, E]() {
@@ -6293,9 +6303,12 @@
         // No need to add new vector costs here since we're going to reuse
         // same main/alternate vector ops, just do different shuffling.
       } else if (Instruction::isBinaryOp(E->getOpcode())) {
-        VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
-        VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
-                                               CostKind);
+        if (!LowersToOneAltInstr) {
+          VecCost =
+              TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
+          VecCost +=
+              TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
+        }
       } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
         VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
                                           Builder.getInt1Ty(),
@@ -6315,10 +6328,7 @@
                                          TTI::CastContextHint::None, CostKind);
       }
 
-      if (E->ReuseShuffleIndices.empty()) {
-        CommonCost =
-            TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy);
-      } else {
+      if (LowersToOneAltInstr) {
         SmallVector<int> Mask;
         buildShuffleEntryMask(
             E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
@@ -6327,8 +6337,25 @@
               return I->getOpcode() == E->getAltOpcode();
             },
             Mask);
-        CommonCost = TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
-                                         FinalVecTy, Mask);
+        ArrayRef<Instruction *> Args = {E->getMainOp(), E->getAltOp()};
+        CommonCost = TTI->getAltInstrCost(FinalVecTy, Mask, Args);
+      } else {
+        if (E->ReuseShuffleIndices.empty()) {
+          CommonCost =
+              TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy);
+        } else {
+          SmallVector<int> Mask;
+          buildShuffleEntryMask(
+              E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
+              [E](Instruction *I) {
+                assert(E->isOpcodeOrAlt(I) &&
+                       "Unexpected main/alternate opcode");
+                return I->getOpcode() == E->getAltOpcode();
+              },
+              Mask);
+          CommonCost = TTI->getShuffleCost(
+              TargetTransformInfo::SK_PermuteTwoSrc, FinalVecTy, Mask);
+        }
       }
       LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
       return CommonCost + VecCost - ScalarCost;