Skip to content

Commit 071e822

Browse files
committedOct 25, 2018
[TTI] Add generic SK_Broadcast shuffle costs
I noticed while fixing PR39368 that we don't have generic shuffle costs for broadcast style shuffles. This patch adds SK_BROADCAST handling, but exposes ARM/AARCH64 lack of handling of this type, which I've added a fix for at the same time. Differential Revision: https://reviews.llvm.org/D53570 llvm-svn: 345253
1 parent 2a9c728 commit 071e822

File tree

3 files changed

+54
-5
lines changed

3 files changed

+54
-5
lines changed
 

‎llvm/include/llvm/CodeGen/BasicTTIImpl.h

+19
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,23 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
8080
using BaseT = TargetTransformInfoImplCRTPBase<T>;
8181
using TTI = TargetTransformInfo;
8282

83+
/// Estimate a cost of Broadcast as an extract and sequence of insert
84+
/// operations.
85+
unsigned getBroadcastShuffleOverhead(Type *Ty) {
86+
assert(Ty->isVectorTy() && "Can only shuffle vectors");
87+
unsigned Cost = 0;
88+
// Broadcast cost is equal to the cost of extracting the zero'th element
89+
// plus the cost of inserting it into every element of the result vector.
90+
Cost += static_cast<T *>(this)->getVectorInstrCost(
91+
Instruction::ExtractElement, Ty, 0);
92+
93+
for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
94+
Cost += static_cast<T *>(this)->getVectorInstrCost(
95+
Instruction::InsertElement, Ty, i);
96+
}
97+
return Cost;
98+
}
99+
83100
/// Estimate a cost of shuffle as a sequence of extract and insert
84101
/// operations.
85102
unsigned getPermuteShuffleOverhead(Type *Ty) {
@@ -554,6 +571,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
554571
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
555572
Type *SubTp) {
556573
switch (Kind) {
574+
case TTI::SK_Broadcast:
575+
return getBroadcastShuffleOverhead(Tp);
557576
case TTI::SK_Select:
558577
case TTI::SK_Reverse:
559578
case TTI::SK_Transpose:

‎llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

+13-2
Original file line numberDiff line numberDiff line change
@@ -946,9 +946,20 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
946946

947947
int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
948948
Type *SubTp) {
949-
if (Kind == TTI::SK_Transpose || Kind == TTI::SK_Select ||
950-
Kind == TTI::SK_PermuteSingleSrc) {
949+
if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
950+
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
951951
static const CostTblEntry ShuffleTbl[] = {
952+
// Broadcast shuffle kinds can be performed with 'dup'.
953+
{ TTI::SK_Broadcast, MVT::v8i8, 1 },
954+
{ TTI::SK_Broadcast, MVT::v16i8, 1 },
955+
{ TTI::SK_Broadcast, MVT::v4i16, 1 },
956+
{ TTI::SK_Broadcast, MVT::v8i16, 1 },
957+
{ TTI::SK_Broadcast, MVT::v2i32, 1 },
958+
{ TTI::SK_Broadcast, MVT::v4i32, 1 },
959+
{ TTI::SK_Broadcast, MVT::v2i64, 1 },
960+
{ TTI::SK_Broadcast, MVT::v2f32, 1 },
961+
{ TTI::SK_Broadcast, MVT::v4f32, 1 },
962+
{ TTI::SK_Broadcast, MVT::v2f64, 1 },
952963
// Transpose shuffle kinds can be performed with 'trn1/trn2' and
953964
// 'zip1/zip2' instructions.
954965
{ TTI::SK_Transpose, MVT::v8i8, 1 },

‎llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

+22-3
Original file line numberDiff line numberDiff line change
@@ -400,10 +400,29 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
400400

401401
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
402402
Type *SubTp) {
403-
// We only handle costs of reverse and select shuffles for now.
404-
if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Select)
405-
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
403+
if (Kind == TTI::SK_Broadcast) {
404+
static const CostTblEntry NEONDupTbl[] = {
405+
// VDUP handles these cases.
406+
{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
407+
{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
408+
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
409+
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
410+
{ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
411+
{ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
412+
413+
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
414+
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
415+
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
416+
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
406417

418+
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
419+
420+
if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE,
421+
LT.second))
422+
return LT.first * Entry->Cost;
423+
424+
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
425+
}
407426
if (Kind == TTI::SK_Reverse) {
408427
static const CostTblEntry NEONShuffleTbl[] = {
409428
// Reverse shuffle cost one instruction if we are shuffling within a

0 commit comments

Comments
 (0)
Please sign in to comment.