diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1608,16 +1608,21 @@ }; if (ST->hasSSE2()) { - bool IsLoad = - llvm::any_of(Args, [](const auto &V) { return isa(V); }); - if (ST->hasSSE3() && IsLoad) - if (const auto *Entry = - CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) { - assert(isLegalBroadcastLoad(BaseTp->getElementType(), - LT.second.getVectorElementCount()) && - "Table entry missing from isLegalBroadcastLoad()"); - return LT.first * Entry->Cost; - } + if (bool IsLoad = !Args.empty() && isa(Args[0])) { + // A Load can be combined with a Broadcast if the Broadcast is the Load's + // single user. + const LoadInst *L = cast(Args[0]); + bool LoadCanBeCombined = + L->hasOneUse() && isa(L->user_back()); + if (ST->hasSSE3() && LoadCanBeCombined) + if (const auto *Entry = + CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) { + assert(isLegalBroadcastLoad(BaseTp->getElementType(), + LT.second.getVectorElementCount()) && + "Table entry missing from isLegalBroadcastLoad()"); + return LT.first * Entry->Cost; + } + } if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll @@ -471,3 +471,49 @@ ret void } + +; Checks the cost of a load+broadcast that cannot be combined. +define void @multiple_uses() { +; SSE-LABEL: 'multiple_uses' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE2-LABEL: 'multiple_uses' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'multiple_uses' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'multiple_uses' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'multiple_uses' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'multiple_uses' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +bb1: + %ld_2xf64 = load <2 x double>, ptr undef + ; Load has multiple uses + %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer + %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer + ret void +}