Index: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h @@ -80,6 +80,23 @@ using BaseT = TargetTransformInfoImplCRTPBase; using TTI = TargetTransformInfo; + /// Estimate a cost of Broadcast as an extract and sequence of insert + /// operations. + unsigned getBroadcastShuffleOverhead(Type *Ty) { + assert(Ty->isVectorTy() && "Can only shuffle vectors"); + unsigned Cost = 0; + // Broadcast cost is equal to the cost of extracting the zero'th element + // plus the cost of inserting it into every element of the result vector. + Cost += static_cast(this)->getVectorInstrCost( + Instruction::ExtractElement, Ty, 0); + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + Cost += static_cast(this)->getVectorInstrCost( + Instruction::InsertElement, Ty, i); + } + return Cost; + } + /// Estimate a cost of shuffle as a sequence of extract and insert /// operations. unsigned getPermuteShuffleOverhead(Type *Ty) { @@ -554,6 +571,8 @@ unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { switch (Kind) { + case TTI::SK_Broadcast: + return getBroadcastShuffleOverhead(Tp); case TTI::SK_Select: case TTI::SK_Reverse: case TTI::SK_Transpose: Index: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -946,9 +946,20 @@ int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - if (Kind == TTI::SK_Transpose || Kind == TTI::SK_Select || - Kind == TTI::SK_PermuteSingleSrc) { + if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || + Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) { static const CostTblEntry ShuffleTbl[] = { + // Broadcast shuffle kinds can be performed with 'dup'. + { TTI::SK_Broadcast, MVT::v8i8, 1 }, + { TTI::SK_Broadcast, MVT::v16i8, 1 }, + { TTI::SK_Broadcast, MVT::v4i16, 1 }, + { TTI::SK_Broadcast, MVT::v8i16, 1 }, + { TTI::SK_Broadcast, MVT::v2i32, 1 }, + { TTI::SK_Broadcast, MVT::v4i32, 1 }, + { TTI::SK_Broadcast, MVT::v2i64, 1 }, + { TTI::SK_Broadcast, MVT::v2f32, 1 }, + { TTI::SK_Broadcast, MVT::v4f32, 1 }, + { TTI::SK_Broadcast, MVT::v2f64, 1 }, // Transpose shuffle kinds can be performed with 'trn1/trn2' and // 'zip1/zip2' instructions. { TTI::SK_Transpose, MVT::v8i8, 1 }, Index: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -400,10 +400,29 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - // We only handle costs of reverse and select shuffles for now. - if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Select) - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + if (Kind == TTI::SK_Broadcast) { + static const CostTblEntry NEONDupTbl[] = { + // VDUP handles these cases. + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, + + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}}; + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + + if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, + LT.second)) + return LT.first * Entry->Cost; + + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + } if (Kind == TTI::SK_Reverse) { static const CostTblEntry NEONShuffleTbl[] = { // Reverse shuffle cost one instruction if we are shuffling within a