diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -357,7 +357,7 @@ } unsigned getRegUsageForType(Type *Ty) { - return getTLI()->getTypeLegalizationCost(DL, Ty).first; + return *getTLI()->getTypeLegalizationCost(DL, Ty).first.getValue(); } InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, @@ -750,7 +750,7 @@ Opd1PropInfo, Opd2PropInfo, Args, CxtI); - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); bool IsFloat = Ty->isFPOrFPVectorTy(); // Assume that floating point arithmetic operations cost twice as much as @@ -820,8 +820,8 @@ const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - std::pair SrcLT = TLI->getTypeLegalizationCost(DL, Src); - std::pair DstLT = TLI->getTypeLegalizationCost(DL, Dst); + const auto SrcLT = TLI->getTypeLegalizationCost(DL, Src); + const auto DstLT = TLI->getTypeLegalizationCost(DL, Dst); TypeSize SrcSize = SrcLT.second.getSizeInBits(); TypeSize DstSize = DstLT.second.getSizeInBits(); @@ -993,7 +993,7 @@ if (CondTy->isVectorTy()) ISD = ISD::VSELECT; } - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + const auto LT = TLI->getTypeLegalizationCost(DL, ValTy); if (!(ValTy->isVectorTy() && !LT.second.isVector()) && !TLI->isOperationExpand(ISD, LT.second)) { @@ -1023,10 +1023,7 @@ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { - std::pair LT = - getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); - - return LT.first; + return getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()).first; } InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, @@ -1037,7 +1034,7 @@ // Assume types, such as structs, are expensive. if (getTLI()->getValueType(DL, Src, true) == MVT::Other) return 4; - std::pair LT = getTLI()->getTypeLegalizationCost(DL, Src); + const auto LT = getTLI()->getTypeLegalizationCost(DL, Src); // Assuming that all loads of legal types cost 1. InstructionCost Cost = LT.first; @@ -1804,10 +1801,10 @@ } const TargetLoweringBase *TLI = getTLI(); - std::pair LT = TLI->getTypeLegalizationCost(DL, RetTy); + const auto LT = TLI->getTypeLegalizationCost(DL, RetTy); - SmallVector LegalCost; - SmallVector CustomCost; + SmallVector LegalCost; + SmallVector CustomCost; for (unsigned ISD : ISDs) { if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && @@ -1913,8 +1910,7 @@ } unsigned getNumberOfParts(Type *Tp) { - std::pair LT = getTLI()->getTypeLegalizationCost(DL, Tp); - return LT.first; + return *getTLI()->getTypeLegalizationCost(DL, Tp).first.getValue(); } InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, @@ -1981,11 +1977,9 @@ unsigned NumReduxLevels = Log2_32(NumVecElts); InstructionCost ArithCost = 0; InstructionCost ShuffleCost = 0; - std::pair LT = - thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); + MVT MTy = thisT()->getTLI()->getTypeLegalizationCost(DL, Ty).second; unsigned LongVectorCount = 0; - unsigned MVTLen = - LT.second.isVector() ? LT.second.getVectorNumElements() : 1; + unsigned MVTLen = MTy.isVector() ? MTy.getVectorNumElements() : 1; while (NumVecElts > MVTLen) { NumVecElts /= 2; VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); @@ -2037,11 +2031,9 @@ } InstructionCost MinMaxCost = 0; InstructionCost ShuffleCost = 0; - std::pair LT = - thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); + MVT MTy = thisT()->getTLI()->getTypeLegalizationCost(DL, Ty).second; unsigned LongVectorCount = 0; - unsigned MVTLen = - LT.second.isVector() ? LT.second.getVectorNumElements() : 1; + unsigned MVTLen = MTy.isVector() ? MTy.getVectorNumElements() : 1; while (NumVecElts > MVTLen) { NumVecElts /= 2; auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -224,6 +224,9 @@ /// in order to type-legalize it. using LegalizeKind = std::pair; + /// LegalizeCost holds the cost of type-legalization and the legalized type. + using LegalizeCost = std::pair; + /// Enum that describes how the target represents true/false values. enum BooleanContent { UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. @@ -1815,8 +1818,7 @@ int InstructionOpcodeToISD(unsigned Opcode) const; /// Estimate the cost of type-legalization and the legalized type. - std::pair getTypeLegalizationCost(const DataLayout &DL, - Type *Ty) const; + LegalizeCost getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const; /// @} diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1807,13 +1807,13 @@ llvm_unreachable("Unknown instruction type encountered!"); } -std::pair +TargetLoweringBase::LegalizeCost TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const { LLVMContext &C = Ty->getContext(); EVT MTy = getValueType(DL, Ty); - int Cost = 1; + InstructionCost Cost = 1; // We keep legalizing the type until we find a legal kind. We assume that // the only operation that costs anything is the split. After splitting // we need to handle two types. diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -432,8 +432,10 @@ return false; // Get the total number of vector elements in the legalized types. - unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements(); - unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements(); + InstructionCost NumDstEls = + DstTyL.first * DstTyL.second.getVectorMinNumElements(); + InstructionCost NumSrcEls = + SrcTyL.first * SrcTyL.second.getVectorMinNumElements(); // Return true if the legalized types have the same number of vector elements // and the destination element type size is twice that of the source type. @@ -793,7 +795,7 @@ if (Index != -1U) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Val); + const auto LT = TLI->getTypeLegalizationCost(DL, Val); // This type is legalized to a scalar type. if (!LT.second.isVector()) @@ -825,7 +827,7 @@ Opd2PropInfo, Args, CxtI); // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.), // add in the widening overhead specified by the sub-target. Since the @@ -1391,7 +1393,7 @@ assert((isa(Ty) && isa(CondTy)) && "Both vector needs to be scalable"); - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); InstructionCost LegalizationCost = 0; if (LT.first > 1) { Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext()); @@ -1413,7 +1415,7 @@ TTI::TargetCostKind CostKind) { assert(!IsPairwise && "Cannot be pair wise to continue"); - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + const auto LT = TLI->getTypeLegalizationCost(DL, ValTy); InstructionCost LegalizationCost = 0; if (LT.first > 1) { Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext()); @@ -1448,7 +1450,7 @@ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, CostKind); - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + const auto LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -1548,7 +1550,7 @@ { TTI::SK_Reverse, MVT::nxv4i1, 1 }, { TTI::SK_Reverse, MVT::nxv2i1, 1 }, }; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + const auto LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -531,7 +531,7 @@ int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); bool IsFloat = Ty->isFPOrFPVectorTy(); // Assume that floating point arithmetic operations cost twice as much as @@ -569,7 +569,7 @@ } // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); // Because we don't have any legal vector operations, but the legal types, we @@ -775,7 +775,7 @@ } // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, RetTy); + const auto LT = TLI->getTypeLegalizationCost(DL, RetTy); unsigned NElts = LT.second.isVector() ? LT.second.getVectorNumElements() : 1; @@ -857,7 +857,7 @@ OrigTy.getScalarSizeInBits() != 16) return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise, CostKind); - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); return LT.first * getFullRateInstrCost(); } @@ -875,7 +875,7 @@ return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned, CostKind); - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); return LT.first * getHalfRateInstrCost(CostKind); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -488,8 +488,7 @@ const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const; - std::pair getTypeLegalizationCost(const DataLayout &DL, - Type *Ty) const; + LegalizeCost getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const; }; } // End namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12205,10 +12205,10 @@ return hasCFUser(V, Visited, Subtarget->getWavefrontSize()); } -std::pair +TargetLoweringBase::LegalizeCost SITargetLowering::getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const { - auto Cost = TargetLoweringBase::getTypeLegalizationCost(DL, Ty); + LegalizeCost Cost = TargetLoweringBase::getTypeLegalizationCost(DL, Ty); auto Size = DL.getTypeSizeInBits(Ty); // Maximum load or store can handle 8 dwords for scalar and 4 for // vector ALU. Let's assume anything above 8 dwords is expensive diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -559,7 +559,7 @@ {ISD::FP_EXTEND, MVT::v2f32, 2}, {ISD::FP_EXTEND, MVT::v4f32, 4}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); + const auto LT = TLI->getTypeLegalizationCost(DL, Src); if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second)) return AdjustCost(LT.first * Entry->Cost); } @@ -825,9 +825,9 @@ // Integer cross-lane moves are more expensive than float, which can // sometimes just be vmovs. Integer involve being passes to GPR registers, // causing more of a delay. - std::pair LT = - getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType()); - return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1); + InstructionCost Cost = + getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType()).first; + return Cost * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1); } return BaseT::getVectorInstrCost(Opcode, ValTy, Index); @@ -851,7 +851,7 @@ // - may require one or more conditional mov (including an IT), // - can't operate directly on immediates, // - require live flags, which we can't copy around easily. - int Cost = TLI->getTypeLegalizationCost(DL, ValTy).first; + InstructionCost Cost = TLI->getTypeLegalizationCost(DL, ValTy).first; // Possible IT instruction for Thumb2, or more for Thumb1. ++Cost; @@ -928,8 +928,7 @@ return Entry->Cost; } - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); - return LT.first; + return TLI->getTypeLegalizationCost(DL, ValTy).first; } if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() && @@ -952,7 +951,7 @@ I); } - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + const auto LT = TLI->getTypeLegalizationCost(DL, ValTy); int BaseCost = ST->getMVEVectorCostFactor(CostKind); // There are two types - the input that specifies the type of the compare // and the output vXi1 type. Because we don't know how the output will be @@ -1155,8 +1154,7 @@ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - + const auto LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second)) return LT.first * Entry->Cost; @@ -1177,8 +1175,7 @@ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - + const auto LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) return LT.first * Entry->Cost; @@ -1202,7 +1199,7 @@ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + const auto LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) return LT.first * Entry->Cost; @@ -1218,8 +1215,7 @@ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, {ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - + const auto LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE, LT.second)) return LT.first * Entry->Cost * @@ -1227,7 +1223,7 @@ } if (!Mask.empty()) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + const auto LT = TLI->getTypeLegalizationCost(DL, Tp); if (Mask.size() <= LT.second.getVectorNumElements() && (isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) || isVREVMask(Mask, LT.second, 64))) @@ -1263,7 +1259,7 @@ } } - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); if (ST->hasNEON()) { const unsigned FunctionCallDivCost = 20; @@ -1402,8 +1398,7 @@ cast(Src)->getElementType()->isDoubleTy()) { // Unaligned loads/stores are extremely inefficient. // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr. - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); - return LT.first * 4; + return TLI->getTypeLegalizationCost(DL, Src).first * 4; } // MVE can optimize a fpext(load(4xhalf)) using an extending integer load. @@ -1503,14 +1498,14 @@ unsigned NumElems = VTy->getNumElements(); unsigned EltSize = VTy->getScalarSizeInBits(); - std::pair LT = TLI->getTypeLegalizationCost(DL, DataTy); + const auto LT = TLI->getTypeLegalizationCost(DL, DataTy); // For now, it is assumed that for the MVE gather instructions the loads are // all effectively serialised. This means the cost is the scalar cost // multiplied by the number of elements being loaded. This is possibly very // conservative, but even so we still end up vectorising loops because the // cost per iteration for many loops is lower than for scalar loops. - unsigned VectorCost = + InstructionCost VectorCost = NumElems * LT.first * ST->getMVEVectorCostFactor(CostKind); // The scalarization cost should be a lot higher. We use the number of vector // elements plus the scalarization overhead. @@ -1597,7 +1592,7 @@ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, CostKind); - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + const auto LT = TLI->getTypeLegalizationCost(DL, ValTy); static const CostTblEntry CostTblAdd[]{ {ISD::ADD, MVT::v16i8, 1}, @@ -1618,7 +1613,7 @@ EVT ValVT = TLI->getValueType(DL, ValTy); EVT ResVT = TLI->getValueType(DL, ResTy); if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) { - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + const auto LT = TLI->getTypeLegalizationCost(DL, ValTy); if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) || (LT.second == MVT::v8i16 && ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) || @@ -1651,10 +1646,9 @@ case Intrinsic::usub_sat: { if (!ST->hasMVEIntegerOps()) break; - Type *VT = ICA.getReturnType(); - std::pair LT = - TLI->getTypeLegalizationCost(DL, VT); + Type *VT = ICA.getReturnType(); + const auto LT = TLI->getTypeLegalizationCost(DL, VT); if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 || LT.second == MVT::v16i8) { // This is a base cost of 1 for the vqadd, plus 3 extract shifts if we @@ -1672,9 +1666,8 @@ case Intrinsic::umax: { if (!ST->hasMVEIntegerOps()) break; - Type *VT = ICA.getReturnType(); - std::pair LT = TLI->getTypeLegalizationCost(DL, VT); + const auto LT = TLI->getTypeLegalizationCost(DL, ICA.getReturnType()); if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 || LT.second == MVT::v16i8) return LT.first * ST->getMVEVectorCostFactor(CostKind); @@ -1684,8 +1677,8 @@ case Intrinsic::maxnum: { if (!ST->hasMVEFloatOps()) break; - Type *VT = ICA.getReturnType(); - std::pair LT = TLI->getTypeLegalizationCost(DL, VT); + + const auto LT = TLI->getTypeLegalizationCost(DL, ICA.getReturnType()); if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) return LT.first * ST->getMVEVectorCostFactor(CostKind); break; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -143,8 +143,7 @@ HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { if (ICA.getID() == Intrinsic::bswap) { - std::pair LT = TLI.getTypeLegalizationCost(DL, ICA.getReturnType()); - return LT.first + 2; + return TLI.getTypeLegalizationCost(DL, ICA.getReturnType()).first + 2; } return BaseT::getIntrinsicInstrCost(ICA, CostKind); } @@ -251,9 +250,9 @@ TTI::TargetCostKind CostKind, const Instruction *I) { if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) { - std::pair LT = TLI.getTypeLegalizationCost(DL, ValTy); if (Opcode == Instruction::FCmp) - return LT.first + FloatFactor * getTypeNumElements(ValTy); + return TLI.getTypeLegalizationCost(DL, ValTy).first + + FloatFactor * getTypeNumElements(ValTy); } return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } @@ -271,7 +270,7 @@ Opd2PropInfo, Args, CxtI); if (Ty->isVectorTy()) { - std::pair LT = TLI.getTypeLegalizationCost(DL, Ty); + const auto LT = TLI.getTypeLegalizationCost(DL, Ty); if (LT.second.isFloatingPoint()) return LT.first + FloatFactor * getTypeNumElements(Ty); } @@ -288,9 +287,10 @@ unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0; unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0; - std::pair SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy); - std::pair DstLT = TLI.getTypeLegalizationCost(DL, DstTy); - unsigned Cost = std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN); + InstructionCost SrcCost = TLI.getTypeLegalizationCost(DL, SrcTy).first; + InstructionCost DstCost = TLI.getTypeLegalizationCost(DL, DstTy).first; + InstructionCost Cost = + std::max(SrcCost, DstCost) + FloatFactor * (SrcN + DstN); // TODO: Allow non-throughput costs that aren't binary. if (CostKind != TTI::TCK_RecipThroughput) return Cost == 0 ? 0 : 1; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -375,7 +375,7 @@ TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -328,8 +328,8 @@ if (U->getType()->isVectorTy()) { // Instructions that need to be split should cost more. - std::pair LT = TLI->getTypeLegalizationCost(DL, U->getType()); - return LT.first * BaseT::getUserCost(U, Operands, CostKind); + return TLI->getTypeLegalizationCost(DL, U->getType()).first * + BaseT::getUserCost(U, Operands, CostKind); } return BaseT::getUserCost(U, Operands, CostKind); @@ -946,7 +946,7 @@ if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy()) return Cost; - std::pair LT1 = TLI->getTypeLegalizationCost(DL, Ty1); + const auto LT1 = TLI->getTypeLegalizationCost(DL, Ty1); // If type legalization involves splitting the vector, we don't want to // double the cost at every step - only the last step. if (LT1.first != 1 || !LT1.second.isVector()) @@ -957,7 +957,7 @@ return Cost; if (Ty2) { - std::pair LT2 = TLI->getTypeLegalizationCost(DL, Ty2); + const auto LT2 = TLI->getTypeLegalizationCost(DL, Ty2); if (LT2.first != 1 || !LT2.second.isVector()) return Cost; } @@ -987,16 +987,13 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef Mask, int Index, Type *SubTp) { - // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - // PPC, for both Altivec/VSX, support cheap arbitrary permutations // (at least in the sense that there need only be one non-loop-invariant // instruction). We need one such shuffle instruction for each actual // register (this is not true for arbitrary shuffles, but is true for the // structured types of shuffles covered by TTI::ShuffleKind). - return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp, - nullptr); + return vectorCostAdjustment(TLI->getTypeLegalizationCost(DL, Tp).first, + Instruction::ShuffleVector, Tp, nullptr); } InstructionCost PPCTTIImpl::getCFInstrCost(unsigned Opcode, @@ -1113,7 +1110,7 @@ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); + const auto LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); @@ -1197,9 +1194,6 @@ assert(isa(VecTy) && "Expect a vector type for interleaved memory op"); - // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, VecTy); - // Firstly, the cost of load/store operation. InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind); @@ -1209,7 +1203,7 @@ // instruction). For each result vector, we need one shuffle per incoming // vector (except that the first shuffle can take two incoming vectors // because it does not need to take itself). - Cost += Factor*(LT.first-1); + Cost += Factor * (TLI->getTypeLegalizationCost(DL, VecTy).first - 1); return Cost; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -185,7 +185,7 @@ Op2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -966,7 +966,7 @@ VectorType *SubTp) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. // 64-bit packed integer vectors (v2i32) are widened to type v4i32. - std::pair LT = TLI->getTypeLegalizationCost(DL, BaseTp); + auto LT = TLI->getTypeLegalizationCost(DL, BaseTp); // Treat Transpose as 2-op shuffles - there's no difference in lowering. if (Kind == TTI::SK_Transpose) @@ -984,7 +984,7 @@ int NumElts = LT.second.getVectorNumElements(); if ((Index % NumElts) == 0) return 0; - std::pair SubLT = TLI->getTypeLegalizationCost(DL, SubTp); + const auto SubLT = TLI->getTypeLegalizationCost(DL, SubTp); if (SubLT.second.isVector()) { int NumSubElts = SubLT.second.getVectorNumElements(); if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) @@ -1030,7 +1030,7 @@ // isn't free, because we need to preserve the rest of the wide vector. if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) { int NumElts = LT.second.getVectorNumElements(); - std::pair SubLT = TLI->getTypeLegalizationCost(DL, SubTp); + const auto SubLT = TLI->getTypeLegalizationCost(DL, SubTp); if (SubLT.second.isVector()) { int NumSubElts = SubLT.second.getVectorNumElements(); if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) @@ -1090,12 +1090,12 @@ // Number of source vectors after legalization: unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize; // Number of destination vectors after legalization: - unsigned NumOfDests = LT.first; + InstructionCost NumOfDests = LT.first; auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(), LegalVT.getVectorNumElements()); - unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; + InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, None, 0, nullptr); } @@ -1106,8 +1106,8 @@ // For 2-input shuffles, we must account for splitting the 2 inputs into many. if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) { // We assume that source and destination have the same vector type. - int NumOfDests = LT.first; - int NumOfShufflesPerDest = LT.first * 2 - 1; + InstructionCost NumOfDests = LT.first; + InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1; LT.first = NumOfDests * NumOfShufflesPerDest; } @@ -2021,12 +2021,12 @@ { ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1 }, // PSHUFD }; - std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); - std::pair LTDest = TLI->getTypeLegalizationCost(DL, Dst); + const auto LTSrc = TLI->getTypeLegalizationCost(DL, Src); + MVT MTyDest = TLI->getTypeLegalizationCost(DL, Dst).second; if (ST->hasSSE2() && !ST->hasAVX()) { if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, - LTDest.second, LTSrc.second)) + MTyDest, LTSrc.second)) return AdjustCost(LTSrc.first * Entry->Cost); } @@ -2111,7 +2111,7 @@ I); // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + const auto LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; @@ -2801,7 +2801,7 @@ if (ISD != ISD::DELETED_NODE) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, OpTy); + const auto LT = TLI->getTypeLegalizationCost(DL, OpTy); MVT MTy = LT.second; // Attempt to lookup cost. @@ -2821,7 +2821,8 @@ return LT.first * Cost; } - auto adjustTableCost = [](const CostTblEntry &Entry, int LegalizationCost, + auto adjustTableCost = [](const CostTblEntry &Entry, + InstructionCost LegalizationCost, FastMathFlags FMF) { // If there are no NANs to deal with, then these are reduced to a // single MIN** or MAX** instruction instead of the MIN/CMP/SELECT that we @@ -3003,7 +3004,7 @@ if (ISD != ISD::DELETED_NODE) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, RetTy); + const auto LT = TLI->getTypeLegalizationCost(DL, RetTy); MVT MTy = LT.second; // Attempt to lookup cost. @@ -3042,22 +3043,22 @@ if (Index != -1U && (Opcode == Instruction::ExtractElement || Opcode == Instruction::InsertElement)) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Val); + MVT MTy = TLI->getTypeLegalizationCost(DL, Val).second; // This type is legalized to a scalar type. - if (!LT.second.isVector()) + if (!MTy.isVector()) return 0; // The type may be split. Normalize the index to the new type. - unsigned NumElts = LT.second.getVectorNumElements(); + unsigned NumElts = MTy.getVectorNumElements(); unsigned SubNumElts = NumElts; Index = Index % NumElts; // For >128-bit vectors, we need to extract higher 128-bit subvectors. // For inserts, we also need to insert the subvector back. - if (LT.second.getSizeInBits() > 128) { - assert((LT.second.getSizeInBits() % 128) == 0 && "Illegal vector"); - unsigned NumSubVecs = LT.second.getSizeInBits() / 128; + if (MTy.getSizeInBits() > 128) { + assert((MTy.getSizeInBits() % 128) == 0 && "Illegal vector"); + unsigned NumSubVecs = MTy.getSizeInBits() / 128; SubNumElts = NumElts / NumSubVecs; if (SubNumElts <= Index) { RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1); @@ -3079,7 +3080,7 @@ int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Unexpected vector opcode"); - MVT MScalarTy = LT.second.getScalarType(); + MVT MScalarTy = MTy.getScalarType(); if (ST->isSLM()) if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy)) return Entry->Cost + RegisterFileMoveCost; @@ -3130,7 +3131,7 @@ // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT. if (Insert) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); MVT MScalarTy = LT.second.getScalarType(); if ((MScalarTy == MVT::i16 && ST->hasSSE2()) || @@ -3154,8 +3155,9 @@ // Case#2: inserting into 5th index needs extracti128 + vpinsrd + // inserti128. // Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128. - unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * LT.first; - unsigned NumElts = LT.second.getVectorNumElements() * LT.first; + const InstructionCost::CostType CostValue = *LT.first.getValue(); + unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * CostValue; + unsigned NumElts = LT.second.getVectorNumElements() * CostValue; APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts); unsigned Scale = NumElts / Num128Lanes; // We iterate each 128-lane, and check if we need a @@ -3246,8 +3248,7 @@ Cost += getMemoryOpCost(Opcode, SubTy, Alignment, AddressSpace, CostKind); - std::pair LST = TLI->getTypeLegalizationCost(DL, SubTy); - if (!LST.second.isVector()) { + if (!TLI->getTypeLegalizationCost(DL, SubTy).second.isVector()) { APInt DemandedElts = APInt::getBitsSet(NumElem, NumElemDone, NumElemDone + Factor); Cost += getScalarizationOverhead(VTy, DemandedElts, @@ -3264,10 +3265,10 @@ } // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); + const auto LT = TLI->getTypeLegalizationCost(DL, Src); // Each load/store unit costs 1. - int Cost = LT.first * 1; + InstructionCost Cost = LT.first * 1; // This isn't exactly right. We're using slow unaligned 32-byte accesses as a // proxy for a double-pumped AVX memory interface such as on Sandybridge. @@ -3313,7 +3314,7 @@ } // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, SrcVTy); + const auto LT = TLI->getTypeLegalizationCost(DL, SrcVTy); auto VT = TLI->getValueType(DL, SrcVTy); InstructionCost Cost = 0; if (VT.isSimple() && LT.second != VT.getSimpleVT() && @@ -3428,7 +3429,7 @@ return Entry->Cost; } - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + const auto LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; @@ -3618,7 +3619,7 @@ InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + const auto LT = TLI->getTypeLegalizationCost(DL, Ty); MVT MTy = LT.second; @@ -3753,7 +3754,7 @@ return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned, CostKind); - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + const auto LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; @@ -4181,9 +4182,10 @@ auto *IndexVTy = FixedVectorType::get( IntegerType::get(SrcVTy->getContext(), IndexSize), VF); - std::pair IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy); - std::pair SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy); - int SplitFactor = std::max(IdxsLT.first, SrcLT.first); + const auto IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy); + const auto SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy); + InstructionCost::CostType SplitFactor = + *std::max(IdxsLT.first, SrcLT.first).getValue(); if (SplitFactor > 1) { // Handle splitting of vector of pointers auto *SplitSrcTy = @@ -4719,7 +4721,7 @@ Indices.size() ? Indices.size() : Factor; auto *ResultTy = FixedVectorType::get(VecTy->getElementType(), VecTy->getNumElements() / Factor); - unsigned NumOfResults = + InstructionCost NumOfResults = getTLI()->getTypeLegalizationCost(DL, ResultTy).first * NumOfLoadsInInterleaveGrp; @@ -4735,7 +4737,7 @@ // The SK_MergeTwoSrc shuffle clobbers one of src operands. // When we have more than one destination, we need additional instructions // to keep sources. - unsigned NumOfMoves = 0; + InstructionCost NumOfMoves = 0; if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc) NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;