diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -137,6 +137,17 @@ bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl &Args) const; + bool hasActiveVectorLength(unsigned Opcode, Type *DataType, + Align Alignment) const; + InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + +private: + // The following constant is used for estimating costs on power9. + static const InstructionCost::CostType P9PipelineFlushEstimate = 80; + /// @} }; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1388,3 +1388,86 @@ return false; } + +bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType, + Align Alignment) const { + // Only load and stores instructions can have variable vector length on Power. + if (Opcode != Instruction::Load && Opcode != Instruction::Store) + return false; + // Loads/stores with length instructions use bits 0-7 of the GPR operand and + // therefore cannot be used in 32-bit mode. + if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64()) + return false; + if (auto *VecTy = dyn_cast(DataType)) { + unsigned VecWidth = DataType->getPrimitiveSizeInBits(); + return VecWidth == 128; + } + Type *ScalarTy = DataType->getScalarType(); + + if (ScalarTy->isPointerTy()) + return true; + + if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) + return true; + + if (!ScalarTy->isIntegerTy()) + return false; + + unsigned IntWidth = ScalarTy->getIntegerBitWidth(); + return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64; +} + +InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) { + InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment, + AddressSpace, CostKind, I); + if (TLI->getValueType(DL, Src, true) == MVT::Other) + return Cost; + // TODO: Handle other cost kinds. + if (CostKind != TTI::TCK_RecipThroughput) + return Cost; + + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + auto *SrcVTy = dyn_cast(Src); + assert(SrcVTy && "Expected a vector type for VP memory operations"); + + if (hasActiveVectorLength(Opcode, Src, Alignment)) { + std::pair LT = + TLI->getTypeLegalizationCost(DL, SrcVTy); + + InstructionCost CostFactor = + vectorCostAdjustmentFactor(Opcode, Src, nullptr); + if (!CostFactor.isValid()) + return InstructionCost::getMax(); + + InstructionCost Cost = LT.first * CostFactor; + assert(Cost.isValid() && "Expected valid cost"); + + // On P9 but not on P10, if the op is misaligned then it will cause a + // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked + // ones. + const Align DesiredAlignment(16); + if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9) + return Cost; + + // Since alignment may be under estimated, we try to compute the probability + // that the actual address is aligned to the desired boundary. For example + // an 8-byte aligned load is assumed to be actually 16-byte aligned half the + // time, while a 4-byte aligned load has a 25% chance of being 16-byte + // aligned. + float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value(); + float MisalignmentProb = 1.0 - AlignmentProb; + return (MisalignmentProb * P9PipelineFlushEstimate) + + (AlignmentProb * *Cost.getValue()); + } + + // Usually we should not get to this point, but the following is an attempt to + // model the cost of legalization. Currently we can only lower intrinsics with + // evl but no mask, on Power 9/10. Otherwise, we must scalarize. + return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); +}