@@ -3542,10 +3542,7 @@ static Value *addFastMathFlag(Value *V) {
35423542// / \brief Estimate the overhead of scalarizing a value based on its type.
35433543// / Insert and Extract are set if the result needs to be inserted and/or
35443544// / extracted from vectors.
3545- // / If the instruction is also to be predicated, add the cost of a PHI
3546- // / node to the insertion cost.
35473545static unsigned getScalarizationOverhead (Type *Ty, bool Insert, bool Extract,
3548- bool Predicated,
35493546 const TargetTransformInfo &TTI) {
35503547 if (Ty->isVoidTy ())
35513548 return 0 ;
@@ -3556,41 +3553,30 @@ static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
35563553 for (unsigned I = 0 , E = Ty->getVectorNumElements (); I < E; ++I) {
35573554 if (Extract)
35583555 Cost += TTI.getVectorInstrCost (Instruction::ExtractElement, Ty, I);
3559- if (Insert) {
3556+ if (Insert)
35603557 Cost += TTI.getVectorInstrCost (Instruction::InsertElement, Ty, I);
3561- if (Predicated)
3562- Cost += TTI.getCFInstrCost (Instruction::PHI);
3563- }
35643558 }
35653559
3566- // If we have a predicated instruction, it may not be executed for each
3567- // vector lane. Scale the cost by the probability of executing the
3568- // predicated block.
3569- if (Predicated)
3570- Cost /= getReciprocalPredBlockProb ();
3571-
35723560 return Cost;
35733561}
35743562
35753563// / \brief Estimate the overhead of scalarizing an Instruction based on the
35763564// / types of its operands and return value.
35773565static unsigned getScalarizationOverhead (SmallVectorImpl<Type *> &OpTys,
3578- Type *RetTy, bool Predicated,
3566+ Type *RetTy,
35793567 const TargetTransformInfo &TTI) {
35803568 unsigned ScalarizationCost =
3581- getScalarizationOverhead (RetTy, true , false , Predicated, TTI);
3569+ getScalarizationOverhead (RetTy, true , false , TTI);
35823570
35833571 for (Type *Ty : OpTys)
3584- ScalarizationCost +=
3585- getScalarizationOverhead (Ty, false , true , Predicated, TTI);
3572+ ScalarizationCost += getScalarizationOverhead (Ty, false , true , TTI);
35863573
35873574 return ScalarizationCost;
35883575}
35893576
35903577// / \brief Estimate the overhead of scalarizing an instruction. This is a
35913578// / convenience wrapper for the type-based getScalarizationOverhead API.
35923579static unsigned getScalarizationOverhead (Instruction *I, unsigned VF,
3593- bool Predicated,
35943580 const TargetTransformInfo &TTI) {
35953581 if (VF == 1 )
35963582 return 0 ;
@@ -3602,7 +3588,7 @@ static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
36023588 for (unsigned OpInd = 0 ; OpInd < OperandsNum; ++OpInd)
36033589 OpTys.push_back (ToVectorTy (I->getOperand (OpInd)->getType (), VF));
36043590
3605- return getScalarizationOverhead (OpTys, RetTy, Predicated, TTI);
3591+ return getScalarizationOverhead (OpTys, RetTy, TTI);
36063592}
36073593
36083594// Estimate cost of a call instruction CI if it were vectorized with factor VF.
@@ -3635,7 +3621,7 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
36353621
36363622 // Compute costs of unpacking argument values for the scalar calls and
36373623 // packing the return values to a vector.
3638- unsigned ScalarizationCost = getScalarizationOverhead (Tys, RetTy, false , TTI);
3624+ unsigned ScalarizationCost = getScalarizationOverhead (Tys, RetTy, TTI);
36393625
36403626 unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
36413627
@@ -6536,10 +6522,27 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
65366522 // vector lane. Get the scalarization cost and scale this amount by the
65376523 // probability of executing the predicated block. If the instruction is not
65386524 // predicated, we fall through to the next case.
6539- if (VF > 1 && Legal->isScalarWithPredication (I))
6540- return VF * TTI.getArithmeticInstrCost (I->getOpcode (), RetTy) /
6541- getReciprocalPredBlockProb () +
6542- getScalarizationOverhead (I, VF, true , TTI);
6525+ if (VF > 1 && Legal->isScalarWithPredication (I)) {
6526+ unsigned Cost = 0 ;
6527+
6528+ // These instructions have a non-void type, so account for the phi nodes
6529+ // that we will create. This cost is likely to be zero. The phi node
6530+ // cost, if any, should be scaled by the block probability because it
6531+ // models a copy at the end of each predicated block.
6532+ Cost += VF * TTI.getCFInstrCost (Instruction::PHI);
6533+
6534+ // The cost of the non-predicated instruction.
6535+ Cost += VF * TTI.getArithmeticInstrCost (I->getOpcode (), RetTy);
6536+
6537+ // The cost of insertelement and extractelement instructions needed for
6538+ // scalarization.
6539+ Cost += getScalarizationOverhead (I, VF, TTI);
6540+
6541+ // Scale the cost by the probability of executing the predicated blocks.
6542+ // This assumes the predicated block for each vector lane is equally
6543+ // likely.
6544+ return Cost / getReciprocalPredBlockProb ();
6545+ }
65436546 case Instruction::Add:
65446547 case Instruction::FAdd:
65456548 case Instruction::Sub:
@@ -6695,7 +6698,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
66956698
66966699 // Get the overhead of the extractelement and insertelement instructions
66976700 // we might create due to scalarization.
6698- Cost += getScalarizationOverhead (I, VF, false , TTI);
6701+ Cost += getScalarizationOverhead (I, VF, TTI);
66996702
67006703 return Cost;
67016704 }
@@ -6782,7 +6785,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
67826785 // The cost of executing VF copies of the scalar instruction. This opcode
67836786 // is unknown. Assume that it is the same as 'mul'.
67846787 return VF * TTI.getArithmeticInstrCost (Instruction::Mul, VectorTy) +
6785- getScalarizationOverhead (I, VF, false , TTI);
6788+ getScalarizationOverhead (I, VF, TTI);
67866789 } // end of switch.
67876790}
67886791
0 commit comments