diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -78,18 +78,19 @@ using BaseT = TargetTransformInfoImplCRTPBase; using TTI = TargetTransformInfo; + /// Helper function to access this as a T. + T *thisT() { return static_cast(this); } + /// Estimate a cost of Broadcast as an extract and sequence of insert /// operations. unsigned getBroadcastShuffleOverhead(FixedVectorType *VTy) { unsigned Cost = 0; // Broadcast cost is equal to the cost of extracting the zero'th element // plus the cost of inserting it into every element of the result vector. - Cost += static_cast(this)->getVectorInstrCost( - Instruction::ExtractElement, VTy, 0); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0); for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { - Cost += static_cast(this)->getVectorInstrCost( - Instruction::InsertElement, VTy, i); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); } return Cost; } @@ -106,10 +107,8 @@ // vector and finally index 3 of second vector and insert them at index // <0,1,2,3> of result vector. for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { - Cost += static_cast(this)->getVectorInstrCost( - Instruction::InsertElement, VTy, i); - Cost += static_cast(this)->getVectorInstrCost( - Instruction::ExtractElement, VTy, i); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i); } return Cost; } @@ -129,10 +128,10 @@ // the source type plus the cost of inserting them into the result vector // type. for (int i = 0; i != NumSubElts; ++i) { - Cost += static_cast(this)->getVectorInstrCost( - Instruction::ExtractElement, VTy, i + Index); - Cost += static_cast(this)->getVectorInstrCost( - Instruction::InsertElement, SubVTy, i); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, + i + Index); + Cost += + thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i); } return Cost; } @@ -152,10 +151,10 @@ // the source type plus the cost of inserting them into the result vector // type. for (int i = 0; i != NumSubElts; ++i) { - Cost += static_cast(this)->getVectorInstrCost( - Instruction::ExtractElement, SubVTy, i); - Cost += static_cast(this)->getVectorInstrCost( - Instruction::InsertElement, VTy, i + Index); + Cost += + thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, + i + Index); } return Cost; } @@ -429,7 +428,7 @@ for (Instruction &I : *BB) { if (isa(I) || isa(I)) { if (const Function *F = cast(I).getCalledFunction()) { - if (!static_cast(this)->isLoweredToCall(F)) + if (!thisT()->isLoweredToCall(F)) continue; } @@ -543,11 +542,9 @@ if (!DemandedElts[i]) continue; if (Insert) - Cost += static_cast(this)->getVectorInstrCost( - Instruction::InsertElement, Ty, i); + Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i); if (Extract) - Cost += static_cast(this)->getVectorInstrCost( - Instruction::ExtractElement, Ty, i); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i); } return Cost; @@ -559,8 +556,7 @@ auto *Ty = cast(InTy); APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements()); - return static_cast(this)->getScalarizationOverhead(Ty, DemandedElts, - Insert, Extract); + return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); } /// Estimate the overhead of scalarizing an instructions unique @@ -653,7 +649,7 @@ // similarly to what getCastInstrCost() does. if (auto *VTy = dyn_cast(Ty)) { unsigned Num = cast(VTy)->getNumElements(); - unsigned Cost = static_cast(this)->getArithmeticInstrCost( + unsigned Cost = thisT()->getArithmeticInstrCost( Opcode, VTy->getScalarType(), CostKind); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -818,9 +814,8 @@ // In other cases where the source or destination are illegal, assume // the operation will get scalarized. unsigned Num = cast(DstVTy)->getNumElements(); - unsigned Cost = static_cast(this)->getCastInstrCost( - Opcode, Dst->getScalarType(), Src->getScalarType(), - CostKind, I); + unsigned Cost = thisT()->getCastInstrCost( + Opcode, Dst->getScalarType(), Src->getScalarType(), CostKind, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -842,11 +837,10 @@ unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) { - return static_cast(this)->getVectorInstrCost( - Instruction::ExtractElement, VecTy, Index) + - static_cast(this)->getCastInstrCost(Opcode, Dst, - VecTy->getElementType(), - TTI::TCK_RecipThroughput); + return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy, + Index) + + thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(), + TTI::TCK_RecipThroughput); } unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) { @@ -886,7 +880,7 @@ unsigned Num = cast(ValVTy)->getNumElements(); if (CondTy) CondTy = CondTy->getScalarType(); - unsigned Cost = static_cast(this)->getCmpSelInstrCost( + unsigned Cost = thisT()->getCmpSelInstrCost( Opcode, ValVTy->getScalarType(), CondTy, CostKind, I); // Return the cost of multiple scalar invocation plus the cost of @@ -959,17 +953,16 @@ // Firstly, the cost of load/store operation. unsigned Cost; if (UseMaskForCond || UseMaskForGaps) - Cost = static_cast(this)->getMaskedMemoryOpCost( - Opcode, VecTy, Alignment, AddressSpace, CostKind); + Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment, + AddressSpace, CostKind); else - Cost = static_cast(this)->getMemoryOpCost( - Opcode, VecTy, Alignment, AddressSpace, CostKind); + Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, + CostKind); // Legalize the vector type, and get the legalized and unlegalized type // sizes. MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; - unsigned VecTySize = - static_cast(this)->getDataLayout().getTypeStoreSize(VecTy); + unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy); unsigned VecTyLTSize = VecTyLT.getStoreSize(); // Return the ceiling of dividing A by B. @@ -1028,14 +1021,14 @@ // Extract elements from loaded vector for each sub vector. for (unsigned i = 0; i < NumSubElts; i++) - Cost += static_cast(this)->getVectorInstrCost( - Instruction::ExtractElement, VT, Index + i * Factor); + Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT, + Index + i * Factor); } unsigned InsSubCost = 0; for (unsigned i = 0; i < NumSubElts; i++) - InsSubCost += static_cast(this)->getVectorInstrCost( - Instruction::InsertElement, SubVT, i); + InsSubCost += + thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, i); Cost += Indices.size() * InsSubCost; } else { @@ -1050,8 +1043,8 @@ unsigned ExtSubCost = 0; for (unsigned i = 0; i < NumSubElts; i++) - ExtSubCost += static_cast(this)->getVectorInstrCost( - Instruction::ExtractElement, SubVT, i); + ExtSubCost += + thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i); Cost += ExtSubCost * Factor; for (unsigned i = 0; i < NumElts; i++) @@ -1077,12 +1070,12 @@ // vector and insert them factor times into the <24xi1> shuffled mask // vector. for (unsigned i = 0; i < NumSubElts; i++) - Cost += static_cast(this)->getVectorInstrCost( - Instruction::ExtractElement, SubVT, i); + Cost += + thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i); for (unsigned i = 0; i < NumElts; i++) - Cost += static_cast(this)->getVectorInstrCost( - Instruction::InsertElement, MaskVT, i); + Cost += + thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT, i); // The Gaps mask is invariant and created outside the loop, therefore the // cost of creating it is not accounted for here. However if we have both @@ -1090,8 +1083,8 @@ // memory access, we need to account for the cost of And-ing the two masks // inside the loop. if (UseMaskForGaps) - Cost += static_cast(this)->getArithmeticInstrCost( - BinaryOperator::And, MaskVT, CostKind); + Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT, + CostKind); return Cost; } @@ -1100,7 +1093,6 @@ unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { Intrinsic::ID IID = ICA.getID(); - auto *ConcreteTTI = static_cast(this); // Special case some scalar intrinsics. if (CostKind != TTI::TCK_RecipThroughput) { @@ -1116,8 +1108,8 @@ return TargetTransformInfo::TCC_Basic; break; case Intrinsic::memcpy: - return ConcreteTTI->getMemcpyCost(ICA.getInst()); - // TODO: other libc intrinsics. + return thisT()->getMemcpyCost(ICA.getInst()); + // TODO: other libc intrinsics. } return BaseT::getIntrinsicInstrCost(ICA, CostKind); } @@ -1166,25 +1158,24 @@ IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, ScalarizationCost, I); - return ConcreteTTI->getIntrinsicInstrCost(Attrs, CostKind); + return thisT()->getIntrinsicInstrCost(Attrs, CostKind); } case Intrinsic::masked_scatter: { assert(VF == 1 && "Can't vectorize types here."); const Value *Mask = Args[3]; bool VarMask = !isa(Mask); Align Alignment = cast(Args[2])->getAlignValue(); - return ConcreteTTI->getGatherScatterOpCost(Instruction::Store, - Args[0]->getType(), Args[1], - VarMask, Alignment, CostKind, - I); + return thisT()->getGatherScatterOpCost(Instruction::Store, + Args[0]->getType(), Args[1], + VarMask, Alignment, CostKind, I); } case Intrinsic::masked_gather: { assert(VF == 1 && "Can't vectorize types here."); const Value *Mask = Args[2]; bool VarMask = !isa(Mask); Align Alignment = cast(Args[1])->getAlignValue(); - return ConcreteTTI->getGatherScatterOpCost( - Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); + return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], + VarMask, Alignment, CostKind, I); } case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_mul: @@ -1217,30 +1208,27 @@ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) unsigned Cost = 0; - Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy, - CostKind); - Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, - CostKind); - Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, - CostKind, - OpKindX, OpKindZ, OpPropsX); - Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, - CostKind, - OpKindY, OpKindZ, OpPropsY); + Cost += + thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind); + Cost += + thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind); + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX); + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY); // Non-constant shift amounts requires a modulo. if (OpKindZ != TTI::OK_UniformConstantValue && OpKindZ != TTI::OK_NonUniformConstantValue) - Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, - CostKind, - OpKindZ, OpKindBW, OpPropsZ, - OpPropsBW); + Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy, + CostKind, OpKindZ, OpKindBW, + OpPropsZ, OpPropsBW); // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { Type *CondTy = RetTy->getWithNewBitWidth(1); - Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, - CondTy, CostKind); - Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, + CondTy, CostKind); } return Cost; } @@ -1253,9 +1241,6 @@ /// based on types. unsigned getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { - - auto *ConcreteTTI = static_cast(this); - Intrinsic::ID IID = ICA.getID(); Type *RetTy = ICA.getReturnType(); const SmallVectorImpl &Tys = ICA.getArgTypes(); @@ -1297,7 +1282,7 @@ IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF); unsigned ScalarCost = - ConcreteTTI->getIntrinsicInstrCost(ScalarAttrs, CostKind); + thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind); return ScalarCalls * ScalarCost + ScalarizationCost; } @@ -1386,57 +1371,57 @@ return 0; case Intrinsic::masked_store: { Type *Ty = Tys[0]; - Align TyAlign = ConcreteTTI->DL.getABITypeAlign(Ty); - return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, - 0, CostKind); + Align TyAlign = thisT()->DL.getABITypeAlign(Ty); + return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0, + CostKind); } case Intrinsic::masked_load: { Type *Ty = RetTy; - Align TyAlign = ConcreteTTI->DL.getABITypeAlign(Ty); - return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, - 0, CostKind); + Align TyAlign = thisT()->DL.getABITypeAlign(Ty); + return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0, + CostKind); } case Intrinsic::experimental_vector_reduce_add: - return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, VecOpTy, - /*IsPairwiseForm=*/false, - CostKind); + return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy, + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_mul: - return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, VecOpTy, - /*IsPairwiseForm=*/false, - CostKind); + return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy, + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_and: - return ConcreteTTI->getArithmeticReductionCost(Instruction::And, VecOpTy, - /*IsPairwiseForm=*/false, - CostKind); + return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy, + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_or: - return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, VecOpTy, - /*IsPairwiseForm=*/false, - CostKind); + return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_xor: - return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, VecOpTy, - /*IsPairwiseForm=*/false, - CostKind); + return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy, + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_v2_fadd: // FIXME: Add new flag for cost of strict reductions. - return ConcreteTTI->getArithmeticReductionCost( - Instruction::FAdd, VecOpTy, - /*IsPairwiseForm=*/false, CostKind); + return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy, + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_v2_fmul: // FIXME: Add new flag for cost of strict reductions. - return ConcreteTTI->getArithmeticReductionCost( - Instruction::FMul, VecOpTy, - /*IsPairwiseForm=*/false, CostKind); + return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy, + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_smax: case Intrinsic::experimental_vector_reduce_smin: case Intrinsic::experimental_vector_reduce_fmax: case Intrinsic::experimental_vector_reduce_fmin: - return ConcreteTTI->getMinMaxReductionCost( + return thisT()->getMinMaxReductionCost( VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, /*IsUnsigned=*/false, CostKind); case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: - return ConcreteTTI->getMinMaxReductionCost( + return thisT()->getMinMaxReductionCost( VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, /*IsUnsigned=*/true, CostKind); @@ -1454,11 +1439,11 @@ unsigned Cost = 0; IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); - Cost += ConcreteTTI->getIntrinsicInstrCost(Attrs, CostKind); - Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, + Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + CostKind); + Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, CostKind); - Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, CostKind); return Cost; } case Intrinsic::uadd_sat: @@ -1473,9 +1458,9 @@ unsigned Cost = 0; IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); - Cost += ConcreteTTI->getIntrinsicInstrCost(Attrs, CostKind); - Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, CostKind); + Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CostKind); return Cost; } case Intrinsic::smul_fix: @@ -1487,22 +1472,18 @@ IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; unsigned Cost = 0; - Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy, CostKind); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy, - CostKind); + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CostKind); Cost += - 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, CostKind); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, - TTI::OK_AnyValue, - TTI::OK_UniformConstantValue); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, - CostKind, - TTI::OK_AnyValue, - TTI::OK_UniformConstantValue); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy, - CostKind); + Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, + CostKind, TTI::OK_AnyValue, + TTI::OK_UniformConstantValue); + Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, + TTI::OK_AnyValue, + TTI::OK_UniformConstantValue); + Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } case Intrinsic::sadd_with_overflow: @@ -1522,13 +1503,12 @@ // Sub: // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) unsigned Cost = 0; - Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, CostKind); - Cost += 2 * ConcreteTTI->getCmpSelInstrCost( - BinaryOperator::ICmp, OverflowTy, OverflowTy, CostKind); - Cost += - ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy, + Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); + Cost += 3 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, + OverflowTy, CostKind); + Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, OverflowTy, + OverflowTy, CostKind); + Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy, CostKind); return Cost; } @@ -1541,9 +1521,9 @@ : BinaryOperator::Sub; unsigned Cost = 0; - Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, CostKind); + Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, + OverflowTy, CostKind); return Cost; } case Intrinsic::smul_with_overflow: @@ -1557,24 +1537,22 @@ IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; unsigned Cost = 0; - Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy, CostKind); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy, - CostKind); + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CostKind); Cost += - 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, CostKind); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, - CostKind, - TTI::OK_AnyValue, - TTI::OK_UniformConstantValue); + Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy, + CostKind, TTI::OK_AnyValue, + TTI::OK_UniformConstantValue); if (IID == Intrinsic::smul_with_overflow) - Cost += ConcreteTTI->getArithmeticInstrCost( - Instruction::AShr, MulTy, CostKind, TTI::OK_AnyValue, - TTI::OK_UniformConstantValue); + Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, + CostKind, TTI::OK_AnyValue, + TTI::OK_UniformConstantValue); - Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, - OverflowTy, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, + OverflowTy, CostKind); return Cost; } case Intrinsic::ctpop: @@ -1631,17 +1609,17 @@ // If we can't lower fmuladd into an FMA estimate the cost as a floating // point mul followed by an add. if (IID == Intrinsic::fmuladd) - return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy, - CostKind) + - ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy, - CostKind); + return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy, + CostKind) + + thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy, + CostKind); if (IID == Intrinsic::experimental_constrained_fmuladd) { IntrinsicCostAttributes FMulAttrs( Intrinsic::experimental_constrained_fmul, RetTy, Tys); IntrinsicCostAttributes FAddAttrs( Intrinsic::experimental_constrained_fadd, RetTy, Tys); - return ConcreteTTI->getIntrinsicInstrCost(FMulAttrs, CostKind) + - ConcreteTTI->getIntrinsicInstrCost(FAddAttrs, CostKind); + return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) + + thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind); } // Else, assume that we need to scalarize this intrinsic. For math builtins @@ -1660,7 +1638,7 @@ ScalarTys.push_back(Ty); } IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF); - unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(Attrs, CostKind); + unsigned ScalarCost = thisT()->getIntrinsicInstrCost(Attrs, CostKind); for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { if (auto *VTy = dyn_cast(Tys[i])) { if (!ICA.skipScalarizationCost()) @@ -1745,9 +1723,8 @@ unsigned NumReduxLevels = Log2_32(NumVecElts); unsigned ArithCost = 0; unsigned ShuffleCost = 0; - auto *ConcreteTTI = static_cast(this); std::pair LT = - ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); + thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); unsigned LongVectorCount = 0; unsigned MVTLen = LT.second.isVector() ? LT.second.getVectorNumElements() : 1; @@ -1755,10 +1732,10 @@ NumVecElts /= 2; VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); // Assume the pairwise shuffles add a cost. - ShuffleCost += (IsPairwise + 1) * - ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, - NumVecElts, SubTy); - ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy, CostKind); + ShuffleCost += + (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector, + Ty, NumVecElts, SubTy); + ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind); Ty = SubTy; ++LongVectorCount; } @@ -1777,12 +1754,10 @@ if (IsPairwise && NumReduxLevels >= 1) NumShuffles += NumReduxLevels - 1; ShuffleCost += NumShuffles * - ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, - 0, Ty); - ArithCost += NumReduxLevels * - ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); + thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty); + ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty); return ShuffleCost + ArithCost + - ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); + thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); } /// Try to calculate op costs for min/max reduction operations. @@ -1804,9 +1779,8 @@ } unsigned MinMaxCost = 0; unsigned ShuffleCost = 0; - auto *ConcreteTTI = static_cast(this); std::pair LT = - ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); + thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); unsigned LongVectorCount = 0; unsigned MVTLen = LT.second.isVector() ? LT.second.getVectorNumElements() : 1; @@ -1816,13 +1790,13 @@ CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts); // Assume the pairwise shuffles add a cost. - ShuffleCost += (IsPairwise + 1) * - ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, - NumVecElts, SubTy); + ShuffleCost += + (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector, + Ty, NumVecElts, SubTy); MinMaxCost += - ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) + - ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, - CostKind); + thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) + + thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, + CostKind); Ty = SubTy; ++LongVectorCount; } @@ -1841,17 +1815,16 @@ if (IsPairwise && NumReduxLevels >= 1) NumShuffles += NumReduxLevels - 1; ShuffleCost += NumShuffles * - ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, - 0, Ty); + thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty); MinMaxCost += NumReduxLevels * - (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + - ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, - CostKind)); + (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + + thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, + CostKind)); // The last min/max should be in vector registers and we counted it above. // So just need a single extractelement. return ShuffleCost + MinMaxCost + - ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); + thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); } unsigned getVectorSplitCost() { return 1; }