diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1602,16 +1602,24 @@ }; if (ST->hasSSE2()) { - bool IsLoad = - llvm::any_of(Args, [](const auto &V) { return isa(V); }); - if (ST->hasSSE3() && IsLoad) - if (const auto *Entry = - CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) { - assert(isLegalBroadcastLoad(BaseTp->getElementType(), - LT.second.getVectorElementCount()) && - "Table entry missing from isLegalBroadcastLoad()"); - return LT.first * Entry->Cost; - } + if (bool IsLoad = !Args.empty() && isa(Args[0])) { + // A Load can be combined with a Broadcast if they are back-to-back in the + // code + const LoadInst *L = cast(Args[0]); + const Instruction *LNext = L->getNextNode(); + bool LoadCanBeCombined = + LNext != nullptr && llvm::any_of(L->users(), [LNext](const User *U) { + return U == LNext; + }); + if (ST->hasSSE3() && LoadCanBeCombined) + if (const auto *Entry = + CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) { + assert(isLegalBroadcastLoad(BaseTp->getElementType(), + LT.second.getVectorElementCount()) && + "Table entry missing from isLegalBroadcastLoad()"); + return LT.first * Entry->Cost; + } + } if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-load.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-load.ll @@ -471,3 +471,100 @@ ret void } + + +; Checks the cost of a load+broadcast that cannot be combined. +define void @not_shuffle_load_not_consecutive() { +; SSE-LABEL: 'not_shuffle_load_not_consecutive' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 8 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE2-LABEL: 'not_shuffle_load_not_consecutive' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'not_shuffle_load_not_consecutive' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'not_shuffle_load_not_consecutive' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'not_shuffle_load_not_consecutive' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'not_shuffle_load_not_consecutive' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %ld_2xf64 = load <2 x double>, ptr undef + + ; Some other instruction between the Load and the Broadcast. + store double undef, ptr undef + + %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer + ret void +} + + +; Checks the cost of a load+broadcast that cannot be combined. +define void @not_shuffle_load_different_bbs() { +; SSE-LABEL: 'not_shuffle_load_different_bbs' +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %bb2 +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE2-LABEL: 'not_shuffle_load_different_bbs' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %bb2 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'not_shuffle_load_different_bbs' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %bb2 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'not_shuffle_load_different_bbs' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %bb2 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'not_shuffle_load_different_bbs' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %bb2 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'not_shuffle_load_different_bbs' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %bb2 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +bb1: + %ld_2xf64 = load <2 x double>, ptr undef + br label %bb2 + +bb2: + ; Load and Broadcast in different BBs + %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer + ret void +}