diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1608,16 +1608,50 @@
   };
 
   if (ST->hasSSE2()) {
-    bool IsLoad =
-        llvm::any_of(Args, [](const auto &V) { return isa<LoadInst>(V); });
-    if (ST->hasSSE3() && IsLoad)
-      if (const auto *Entry =
-              CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
-        assert(isLegalBroadcastLoad(BaseTp->getElementType(),
-                                    LT.second.getVectorElementCount()) &&
-               "Table entry missing from isLegalBroadcastLoad()");
-        return LT.first * Entry->Cost;
+    if (bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0])) {
+      const LoadInst *L = cast<LoadInst>(Args[0]);
+      // There are two use cases:
+      // Case 1 (scalar):
+      //   %ld = double ...
+      //   %add1 = fadd double %ld
+      //   %add2 = fadd double %ld
+      //  This is typically used by SLP.
+      //
+      // Case 2 (vector):
+      //   %ld = double ...
+      //   %ins = insertelement <2 x double> undef, double %ld, i32 0
+      //   %shf = shufflevector <2 x double> %ins, ...
+      //  This is the canonicalized form of the broadcast, so we are only
+      //  matching this one and not multiple insertelements.
+      //
+      // In order to tell which case we are in we are checking the users of the
+      // load. If the users are all scalar, then we are in Case 1. Else if there
+      // is at least one vector user we are in Case 2.
+      //
+      // In Case 1 we cannot be sure whether all the users will be converted to
+      // vectors, and that there won't be any other exteranl user to the load.
+      // This is up to the caller. So we eagerly consider that codegen will be
+      // able to combine load + broadcast.
+      //
+      // In Case 2 we need to make sure that the load has a single user.
+      // Otherwise codegen won't be able to combine load + broadcast.
+
+      // Limit the users we visit to save compilation time.
+      if (!L->hasNUsesOrMore(16)) {
+        bool IsVectorCase = llvm::any_of(L->users(), [](const User *U) {
+          return cast<Instruction>(U)->getType()->isVectorTy();
+        });
+        bool LoadCanBeCombined = IsVectorCase ? L->hasOneUse() : true;
+        if (ST->hasSSE3() && LoadCanBeCombined)
+          if (const auto *Entry =
+                  CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
+            assert(isLegalBroadcastLoad(BaseTp->getElementType(),
+                                        LT.second.getVectorElementCount()) &&
+                   "Table entry missing from isLegalBroadcastLoad()");
+            return LT.first * Entry->Cost;
+          }
       }
+    }
 
     if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
       return LT.first * Entry->Cost;
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
--- a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll
@@ -471,3 +471,49 @@
 
   ret void
 }
+
+; Checks the cost of a load+broadcast that cannot be combined.
+define void @multiple_uses() {
+; SSE-LABEL: 'multiple_uses'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE2-LABEL: 'multiple_uses'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE3-LABEL: 'multiple_uses'
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX-LABEL: 'multiple_uses'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'multiple_uses'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX512-LABEL: 'multiple_uses'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+bb1:
+  %ld_2xf64 = load <2 x double>, ptr undef
+  ; Load has multiple uses
+  %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+  %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+  ret void
+}