diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -357,7 +357,10 @@ } unsigned getRegUsageForType(Type *Ty) { - return getTLI()->getTypeLegalizationCost(DL, Ty).first; + InstructionCost::CostType Val = + *getTLI()->getTypeLegalizationCost(DL, Ty).first.getValue(); + assert(Val >= 0 && "Negative cost!"); + return Val; } InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, @@ -750,7 +753,7 @@ Opd1PropInfo, Opd2PropInfo, Args, CxtI); - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); bool IsFloat = Ty->isFPOrFPVectorTy(); // Assume that floating point arithmetic operations cost twice as much as @@ -852,8 +855,10 @@ const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - std::pair SrcLT = TLI->getTypeLegalizationCost(DL, Src); - std::pair DstLT = TLI->getTypeLegalizationCost(DL, Dst); + std::pair SrcLT = + TLI->getTypeLegalizationCost(DL, Src); + std::pair DstLT = + TLI->getTypeLegalizationCost(DL, Dst); TypeSize SrcSize = SrcLT.second.getSizeInBits(); TypeSize DstSize = DstLT.second.getSizeInBits(); @@ -1025,7 +1030,8 @@ if (CondTy->isVectorTy()) ISD = ISD::VSELECT; } - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = + TLI->getTypeLegalizationCost(DL, ValTy); if (!(ValTy->isVectorTy() && !LT.second.isVector()) && !TLI->isOperationExpand(ISD, LT.second)) { @@ -1055,7 +1061,7 @@ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { - std::pair LT = + std::pair LT = getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); return LT.first; @@ -1069,7 +1075,8 @@ // Assume types, such as structs, are expensive. if (getTLI()->getValueType(DL, Src, true) == MVT::Other) return 4; - std::pair LT = getTLI()->getTypeLegalizationCost(DL, Src); + std::pair LT = + getTLI()->getTypeLegalizationCost(DL, Src); // Assuming that all loads of legal types cost 1. InstructionCost Cost = LT.first; @@ -1836,10 +1843,11 @@ } const TargetLoweringBase *TLI = getTLI(); - std::pair LT = TLI->getTypeLegalizationCost(DL, RetTy); + std::pair LT = + TLI->getTypeLegalizationCost(DL, RetTy); - SmallVector LegalCost; - SmallVector CustomCost; + SmallVector LegalCost; + SmallVector CustomCost; for (unsigned ISD : ISDs) { if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && @@ -1945,8 +1953,9 @@ } unsigned getNumberOfParts(Type *Tp) { - std::pair LT = getTLI()->getTypeLegalizationCost(DL, Tp); - return LT.first; + std::pair LT = + getTLI()->getTypeLegalizationCost(DL, Tp); + return *LT.first.getValue(); } InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, @@ -2013,7 +2022,7 @@ unsigned NumReduxLevels = Log2_32(NumVecElts); InstructionCost ArithCost = 0; InstructionCost ShuffleCost = 0; - std::pair LT = + std::pair LT = thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); unsigned LongVectorCount = 0; unsigned MVTLen = @@ -2069,7 +2078,7 @@ } InstructionCost MinMaxCost = 0; InstructionCost ShuffleCost = 0; - std::pair LT = + std::pair LT = thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); unsigned LongVectorCount = 0; unsigned MVTLen = diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1815,8 +1815,8 @@ int InstructionOpcodeToISD(unsigned Opcode) const; /// Estimate the cost of type-legalization and the legalized type. - std::pair getTypeLegalizationCost(const DataLayout &DL, - Type *Ty) const; + std::pair getTypeLegalizationCost(const DataLayout &DL, + Type *Ty) const; /// @} diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1807,13 +1807,13 @@ llvm_unreachable("Unknown instruction type encountered!"); } -std::pair +std::pair TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const { LLVMContext &C = Ty->getContext(); EVT MTy = getValueType(DL, Ty); - int Cost = 1; + InstructionCost Cost = 1; // We keep legalizing the type until we find a legal kind. We assume that // the only operation that costs anything is the split. After splitting // we need to handle two types. diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -545,8 +545,10 @@ return false; // Get the total number of vector elements in the legalized types. - unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements(); - unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements(); + InstructionCost NumDstEls = + DstTyL.first * DstTyL.second.getVectorMinNumElements(); + InstructionCost NumSrcEls = + SrcTyL.first * SrcTyL.second.getVectorMinNumElements(); // Return true if the legalized types have the same number of vector elements // and the destination element type size is twice that of the source type. @@ -906,7 +908,7 @@ if (Index != -1U) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Val); + std::pair LT = TLI->getTypeLegalizationCost(DL, Val); // This type is legalized to a scalar type. if (!LT.second.isVector()) @@ -938,7 +940,7 @@ Opd2PropInfo, Args, CxtI); // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.), // add in the widening overhead specified by the sub-target. Since the @@ -1536,7 +1538,7 @@ assert((isa(Ty) && isa(CondTy)) && "Both vector needs to be scalable"); - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); InstructionCost LegalizationCost = 0; if (LT.first > 1) { Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext()); @@ -1558,7 +1560,7 @@ TTI::TargetCostKind CostKind) { assert(!IsPairwise && "Cannot be pair wise to continue"); - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); InstructionCost LegalizationCost = 0; if (LT.first > 1) { Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext()); @@ -1593,7 +1595,7 @@ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, CostKind); - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -1694,7 +1696,7 @@ { TTI::SK_Reverse, MVT::nxv4i1, 1 }, { TTI::SK_Reverse, MVT::nxv2i1, 1 }, }; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -531,7 +531,7 @@ int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); bool IsFloat = Ty->isFPOrFPVectorTy(); // Assume that floating point arithmetic operations cost twice as much as @@ -569,7 +569,7 @@ } // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); // Because we don't have any legal vector operations, but the legal types, we @@ -775,7 +775,7 @@ } // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, RetTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, RetTy); unsigned NElts = LT.second.isVector() ? LT.second.getVectorNumElements() : 1; @@ -857,7 +857,7 @@ OrigTy.getScalarSizeInBits() != 16) return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise, CostKind); - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); return LT.first * getFullRateInstrCost(); } @@ -875,7 +875,7 @@ return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned, CostKind); - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); return LT.first * getHalfRateInstrCost(CostKind); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -491,8 +491,8 @@ const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const; - std::pair getTypeLegalizationCost(const DataLayout &DL, - Type *Ty) const; + std::pair getTypeLegalizationCost(const DataLayout &DL, + Type *Ty) const; }; } // End namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12257,10 +12257,11 @@ return hasCFUser(V, Visited, Subtarget->getWavefrontSize()); } -std::pair +std::pair SITargetLowering::getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const { - auto Cost = TargetLoweringBase::getTypeLegalizationCost(DL, Ty); + std::pair Cost = + TargetLoweringBase::getTypeLegalizationCost(DL, Ty); auto Size = DL.getTypeSizeInBits(Ty); // Maximum load or store can handle 8 dwords for scalar and 4 for // vector ALU. Let's assume anything above 8 dwords is expensive diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -559,7 +559,7 @@ {ISD::FP_EXTEND, MVT::v2f32, 2}, {ISD::FP_EXTEND, MVT::v4f32, 4}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); + std::pair LT = TLI->getTypeLegalizationCost(DL, Src); if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second)) return AdjustCost(LT.first * Entry->Cost); } @@ -825,7 +825,7 @@ // Integer cross-lane moves are more expensive than float, which can // sometimes just be vmovs. Integer involve being passes to GPR registers, // causing more of a delay. - std::pair LT = + std::pair LT = getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType()); return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1); } @@ -851,7 +851,7 @@ // - may require one or more conditional mov (including an IT), // - can't operate directly on immediates, // - require live flags, which we can't copy around easily. - int Cost = TLI->getTypeLegalizationCost(DL, ValTy).first; + InstructionCost Cost = TLI->getTypeLegalizationCost(DL, ValTy).first; // Possible IT instruction for Thumb2, or more for Thumb1. ++Cost; @@ -928,7 +928,8 @@ return Entry->Cost; } - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = + TLI->getTypeLegalizationCost(DL, ValTy); return LT.first; } @@ -952,7 +953,8 @@ I); } - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = + TLI->getTypeLegalizationCost(DL, ValTy); int BaseCost = ST->getMVEVectorCostFactor(CostKind); // There are two types - the input that specifies the type of the compare // and the output vXi1 type. Because we don't know how the output will be @@ -1156,8 +1158,7 @@ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second)) return LT.first * Entry->Cost; @@ -1178,8 +1179,7 @@ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) return LT.first * Entry->Cost; @@ -1203,7 +1203,7 @@ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) return LT.first * Entry->Cost; @@ -1219,8 +1219,7 @@ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, {ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE, LT.second)) return LT.first * Entry->Cost * @@ -1228,7 +1227,7 @@ } if (!Mask.empty()) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); if (Mask.size() <= LT.second.getVectorNumElements() && (isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) || isVREVMask(Mask, LT.second, 64))) @@ -1264,7 +1263,7 @@ } } - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); if (ST->hasNEON()) { const unsigned FunctionCallDivCost = 20; @@ -1403,7 +1402,7 @@ cast(Src)->getElementType()->isDoubleTy()) { // Unaligned loads/stores are extremely inefficient. // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr. - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); + std::pair LT = TLI->getTypeLegalizationCost(DL, Src); return LT.first * 4; } @@ -1504,14 +1503,14 @@ unsigned NumElems = VTy->getNumElements(); unsigned EltSize = VTy->getScalarSizeInBits(); - std::pair LT = TLI->getTypeLegalizationCost(DL, DataTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, DataTy); // For now, it is assumed that for the MVE gather instructions the loads are // all effectively serialised. This means the cost is the scalar cost // multiplied by the number of elements being loaded. This is possibly very // conservative, but even so we still end up vectorising loops because the // cost per iteration for many loops is lower than for scalar loops. - unsigned VectorCost = + InstructionCost VectorCost = NumElems * LT.first * ST->getMVEVectorCostFactor(CostKind); // The scalarization cost should be a lot higher. We use the number of vector // elements plus the scalarization overhead. @@ -1598,7 +1597,7 @@ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, CostKind); - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); static const CostTblEntry CostTblAdd[]{ {ISD::ADD, MVT::v16i8, 1}, @@ -1619,7 +1618,8 @@ EVT ValVT = TLI->getValueType(DL, ValTy); EVT ResVT = TLI->getValueType(DL, ResTy); if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) { - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = + TLI->getTypeLegalizationCost(DL, ValTy); if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) || (LT.second == MVT::v8i16 && ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) || @@ -1654,8 +1654,7 @@ break; Type *VT = ICA.getReturnType(); - std::pair LT = - TLI->getTypeLegalizationCost(DL, VT); + std::pair LT = TLI->getTypeLegalizationCost(DL, VT); if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 || LT.second == MVT::v16i8) { // This is a base cost of 1 for the vqadd, plus 3 extract shifts if we @@ -1675,7 +1674,7 @@ break; Type *VT = ICA.getReturnType(); - std::pair LT = TLI->getTypeLegalizationCost(DL, VT); + std::pair LT = TLI->getTypeLegalizationCost(DL, VT); if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 || LT.second == MVT::v16i8) return LT.first * ST->getMVEVectorCostFactor(CostKind); @@ -1686,7 +1685,7 @@ if (!ST->hasMVEFloatOps()) break; Type *VT = ICA.getReturnType(); - std::pair LT = TLI->getTypeLegalizationCost(DL, VT); + std::pair LT = TLI->getTypeLegalizationCost(DL, VT); if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) return LT.first * ST->getMVEVectorCostFactor(CostKind); break; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -143,7 +143,8 @@ HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { if (ICA.getID() == Intrinsic::bswap) { - std::pair LT = TLI.getTypeLegalizationCost(DL, ICA.getReturnType()); + std::pair LT = + TLI.getTypeLegalizationCost(DL, ICA.getReturnType()); return LT.first + 2; } return BaseT::getIntrinsicInstrCost(ICA, CostKind); @@ -251,7 +252,7 @@ TTI::TargetCostKind CostKind, const Instruction *I) { if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) { - std::pair LT = TLI.getTypeLegalizationCost(DL, ValTy); + std::pair LT = TLI.getTypeLegalizationCost(DL, ValTy); if (Opcode == Instruction::FCmp) return LT.first + FloatFactor * getTypeNumElements(ValTy); } @@ -271,7 +272,7 @@ Opd2PropInfo, Args, CxtI); if (Ty->isVectorTy()) { - std::pair LT = TLI.getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI.getTypeLegalizationCost(DL, Ty); if (LT.second.isFloatingPoint()) return LT.first + FloatFactor * getTypeNumElements(Ty); } @@ -288,9 +289,12 @@ unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0; unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0; - std::pair SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy); - std::pair DstLT = TLI.getTypeLegalizationCost(DL, DstTy); - unsigned Cost = std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN); + std::pair SrcLT = + TLI.getTypeLegalizationCost(DL, SrcTy); + std::pair DstLT = + TLI.getTypeLegalizationCost(DL, DstTy); + InstructionCost Cost = + std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN); // TODO: Allow non-throughput costs that aren't binary. if (CostKind != TTI::TCK_RecipThroughput) return Cost == 0 ? 0 : 1; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -375,7 +375,7 @@ TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -328,7 +328,8 @@ if (U->getType()->isVectorTy()) { // Instructions that need to be split should cost more. - std::pair LT = TLI->getTypeLegalizationCost(DL, U->getType()); + std::pair LT = + TLI->getTypeLegalizationCost(DL, U->getType()); return LT.first * BaseT::getUserCost(U, Operands, CostKind); } @@ -946,7 +947,7 @@ if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy()) return Cost; - std::pair LT1 = TLI->getTypeLegalizationCost(DL, Ty1); + std::pair LT1 = TLI->getTypeLegalizationCost(DL, Ty1); // If type legalization involves splitting the vector, we don't want to // double the cost at every step - only the last step. if (LT1.first != 1 || !LT1.second.isVector()) @@ -957,7 +958,7 @@ return Cost; if (Ty2) { - std::pair LT2 = TLI->getTypeLegalizationCost(DL, Ty2); + std::pair LT2 = TLI->getTypeLegalizationCost(DL, Ty2); if (LT2.first != 1 || !LT2.second.isVector()) return Cost; } @@ -988,7 +989,7 @@ ArrayRef Mask, int Index, Type *SubTp) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); // PPC, for both Altivec/VSX, support cheap arbitrary permutations // (at least in the sense that there need only be one non-loop-invariant @@ -1113,7 +1114,7 @@ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); + std::pair LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); @@ -1198,7 +1199,7 @@ "Expect a vector type for interleaved memory op"); // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, VecTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, VecTy); // Firstly, the cost of load/store operation. InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -185,7 +185,7 @@ Op2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -966,7 +966,7 @@ VectorType *SubTp) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. // 64-bit packed integer vectors (v2i32) are widened to type v4i32. - std::pair LT = TLI->getTypeLegalizationCost(DL, BaseTp); + std::pair LT = TLI->getTypeLegalizationCost(DL, BaseTp); Kind = improveShuffleKindFromMask(Kind, Mask); // Treat Transpose as 2-op shuffles - there's no difference in lowering. @@ -985,7 +985,8 @@ int NumElts = LT.second.getVectorNumElements(); if ((Index % NumElts) == 0) return 0; - std::pair SubLT = TLI->getTypeLegalizationCost(DL, SubTp); + std::pair SubLT = + TLI->getTypeLegalizationCost(DL, SubTp); if (SubLT.second.isVector()) { int NumSubElts = SubLT.second.getVectorNumElements(); if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) @@ -1031,7 +1032,8 @@ // isn't free, because we need to preserve the rest of the wide vector. if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) { int NumElts = LT.second.getVectorNumElements(); - std::pair SubLT = TLI->getTypeLegalizationCost(DL, SubTp); + std::pair SubLT = + TLI->getTypeLegalizationCost(DL, SubTp); if (SubLT.second.isVector()) { int NumSubElts = SubLT.second.getVectorNumElements(); if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) @@ -1091,12 +1093,12 @@ // Number of source vectors after legalization: unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize; // Number of destination vectors after legalization: - unsigned NumOfDests = LT.first; + InstructionCost NumOfDests = LT.first; auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(), LegalVT.getVectorNumElements()); - unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; + InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, None, 0, nullptr); } @@ -1107,8 +1109,8 @@ // For 2-input shuffles, we must account for splitting the 2 inputs into many. if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) { // We assume that source and destination have the same vector type. - int NumOfDests = LT.first; - int NumOfShufflesPerDest = LT.first * 2 - 1; + InstructionCost NumOfDests = LT.first; + InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1; LT.first = NumOfDests * NumOfShufflesPerDest; } @@ -2024,8 +2026,9 @@ { ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1 }, // PSHUFD }; - std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); - std::pair LTDest = TLI->getTypeLegalizationCost(DL, Dst); + std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); + std::pair LTDest = + TLI->getTypeLegalizationCost(DL, Dst); if (ST->hasSSE2() && !ST->hasAVX()) { if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, @@ -2114,7 +2117,7 @@ I); // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; @@ -2804,7 +2807,7 @@ if (ISD != ISD::DELETED_NODE) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, OpTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, OpTy); MVT MTy = LT.second; // Attempt to lookup cost. @@ -2824,7 +2827,8 @@ return LT.first * Cost; } - auto adjustTableCost = [](const CostTblEntry &Entry, int LegalizationCost, + auto adjustTableCost = [](const CostTblEntry &Entry, + InstructionCost LegalizationCost, FastMathFlags FMF) { // If there are no NANs to deal with, then these are reduced to a // single MIN** or MAX** instruction instead of the MIN/CMP/SELECT that we @@ -3006,7 +3010,8 @@ if (ISD != ISD::DELETED_NODE) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, RetTy); + std::pair LT = + TLI->getTypeLegalizationCost(DL, RetTy); MVT MTy = LT.second; // Attempt to lookup cost. @@ -3045,7 +3050,7 @@ if (Index != -1U && (Opcode == Instruction::ExtractElement || Opcode == Instruction::InsertElement)) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Val); + std::pair LT = TLI->getTypeLegalizationCost(DL, Val); // This type is legalized to a scalar type. if (!LT.second.isVector()) @@ -3133,7 +3138,7 @@ // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT. if (Insert) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); MVT MScalarTy = LT.second.getScalarType(); if ((MScalarTy == MVT::i16 && ST->hasSSE2()) || @@ -3157,8 +3162,10 @@ // Case#2: inserting into 5th index needs extracti128 + vpinsrd + // inserti128. // Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128. - unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * LT.first; - unsigned NumElts = LT.second.getVectorNumElements() * LT.first; + const int CostValue = *LT.first.getValue(); + assert(CostValue >= 0 && "Negative cost!"); + unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * CostValue; + unsigned NumElts = LT.second.getVectorNumElements() * CostValue; APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts); unsigned Scale = NumElts / Num128Lanes; // We iterate each 128-lane, and check if we need a @@ -3249,7 +3256,8 @@ Cost += getMemoryOpCost(Opcode, SubTy, Alignment, AddressSpace, CostKind); - std::pair LST = TLI->getTypeLegalizationCost(DL, SubTy); + std::pair LST = + TLI->getTypeLegalizationCost(DL, SubTy); if (!LST.second.isVector()) { APInt DemandedElts = APInt::getBitsSet(NumElem, NumElemDone, NumElemDone + Factor); @@ -3267,10 +3275,10 @@ } // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); + std::pair LT = TLI->getTypeLegalizationCost(DL, Src); // Each load/store unit costs 1. - int Cost = LT.first * 1; + InstructionCost Cost = LT.first * 1; // This isn't exactly right. We're using slow unaligned 32-byte accesses as a // proxy for a double-pumped AVX memory interface such as on Sandybridge. @@ -3316,7 +3324,7 @@ } // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, SrcVTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, SrcVTy); auto VT = TLI->getValueType(DL, SrcVTy); InstructionCost Cost = 0; if (VT.isSimple() && LT.second != VT.getSimpleVT() && @@ -3431,7 +3439,7 @@ return Entry->Cost; } - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; @@ -3621,7 +3629,7 @@ InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); MVT MTy = LT.second; @@ -3756,7 +3764,7 @@ return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned, CostKind); - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; @@ -4184,9 +4192,12 @@ auto *IndexVTy = FixedVectorType::get( IntegerType::get(SrcVTy->getContext(), IndexSize), VF); - std::pair IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy); - std::pair SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy); - int SplitFactor = std::max(IdxsLT.first, SrcLT.first); + std::pair IdxsLT = + TLI->getTypeLegalizationCost(DL, IndexVTy); + std::pair SrcLT = + TLI->getTypeLegalizationCost(DL, SrcVTy); + InstructionCost::CostType SplitFactor = + *std::max(IdxsLT.first, SrcLT.first).getValue(); if (SplitFactor > 1) { // Handle splitting of vector of pointers auto *SplitSrcTy = @@ -4722,7 +4733,7 @@ Indices.size() ? Indices.size() : Factor; auto *ResultTy = FixedVectorType::get(VecTy->getElementType(), VecTy->getNumElements() / Factor); - unsigned NumOfResults = + InstructionCost NumOfResults = getTLI()->getTypeLegalizationCost(DL, ResultTy).first * NumOfLoadsInInterleaveGrp; @@ -4738,7 +4749,7 @@ // The SK_MergeTwoSrc shuffle clobbers one of src operands. // When we have more than one destination, we need additional instructions // to keep sources. - unsigned NumOfMoves = 0; + InstructionCost NumOfMoves = 0; if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc) NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;