diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1120,6 +1120,13 @@ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr) const; + /// \return The cost of VP Load and Store instructions. + InstructionCost + getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; + /// \return The cost of masked Load and Store instructions. InstructionCost getMaskedMemoryOpCost( unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, @@ -1641,6 +1648,11 @@ unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) = 0; + virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) = 0; virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, @@ -2144,6 +2156,13 @@ return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind, I); } + InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) override { + return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, I); + } InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -551,6 +551,13 @@ return 1; } + InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) const { + return 1; + } + InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const { diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -137,6 +137,15 @@ bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl &Args) const; + InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + +private: + // The following constant is used for estimating costs on power9. + static const InstructionCost::CostType P9PipelineFlushEstimate = 80; + /// @} }; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1337,3 +1337,78 @@ return false; } + +InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) { + InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment, + AddressSpace, CostKind, I); + if (TLI->getValueType(DL, Src, true) == MVT::Other) + return Cost; + // TODO: Handle other cost kinds. + if (CostKind != TTI::TCK_RecipThroughput) + return Cost; + + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + bool IsLoad = (Opcode == Instruction::Load); + + auto *SrcVTy = dyn_cast(Src); + assert(SrcVTy && "Expected a vector type for VP memory operations"); + + if (getVPLegalizationStrategy(*dyn_cast(I)).shouldDoNothing()) { + std::pair LT = + TLI->getTypeLegalizationCost(DL, SrcVTy); + InstructionCost Cost = vectorCostAdjustment(LT.first, Opcode, Src, nullptr); + + // On P9 but not on P10, if the op is misaligned then it will cause a + // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked + // ones. + if (Alignment >= 16 || ST->getCPUDirective() != PPC::DIR_PWR9) + return Cost; + + // We assume the average case: that ops with alignment <= 128 + // will flush a full pipeline about half the time. + // The cost when this happens is about 80 cycles. + return P9PipelineFlushEstimate / 2; + } + + // Usually we should not get to this point, but the following is an attempt to + // model the cost of legalization. Currently we can only lower intrinsics with + // evl but no mask, on Power 9/10. Otherwise, we must scalarize. We need to + // extract (from the mask) the most/least significant byte of all halfwords + // aligned with vector elements, and do an access predicated on its 0th bit. + // We make the simplifying assumption that byte-extraction costs are + // stride-invariant, so we model the extraction as scalarizing a load of + // . + + // VSX masks have lanes per bit, but the predication is per halfword. + unsigned NumElems = SrcVTy->getNumElements(); + auto *MaskI8Ty = Type::getInt8Ty(SrcVTy->getContext()); + InstructionCost MaskSplitCost = getScalarizationOverhead( + FixedVectorType::get(MaskI8Ty, NumElems), false, true); + const InstructionCost ScalarCompareCostInstrCost = + getCmpSelInstrCost(Instruction::ICmp, MaskI8Ty, nullptr, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + + assert(ScalarCompareCostInstrCost.isValid() && + "Expected valid instruction cost"); + int ScalarCompareCost = *(ScalarCompareCostInstrCost.getValue()); + + const InstructionCost BranchInstrCost = + getCFInstrCost(Instruction::Br, CostKind); + assert(BranchInstrCost.isValid() && "Expected valid instruction cost"); + int BranchCost = *BranchInstrCost.getValue(); + int MaskCmpCost = NumElems * (BranchCost + ScalarCompareCost); + + InstructionCost ValueSplitCost = + getScalarizationOverhead(SrcVTy, IsLoad, !IsLoad); + const InstructionCost ScalarMemOpInstrCost = + NumElems * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), + Alignment, AddressSpace, CostKind); + assert(ScalarMemOpInstrCost.isValid() && "Expected valid instruction cost"); + int ScalarMemOpCost = *(ScalarMemOpInstrCost.getValue()); + return ScalarMemOpCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; +}