diff --git a/llvm/include/llvm/Support/InstructionCost.h b/llvm/include/llvm/Support/InstructionCost.h --- a/llvm/include/llvm/Support/InstructionCost.h +++ b/llvm/include/llvm/Support/InstructionCost.h @@ -244,7 +244,7 @@ template auto map(const Function &F) const -> InstructionCost { if (isValid()) - return F(*getValue()); + return F(Value); return getInvalid(); } }; diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -552,7 +552,7 @@ // inlined so that we shouldn't specialize it. if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || (!ForceFunctionSpecialization && - *Metrics.NumInsts.getValue() < SmallFunctionThreshold)) { + Metrics.NumInsts < SmallFunctionThreshold)) { InstructionCost C{}; C.setInvalid(); return C; diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -1353,16 +1353,13 @@ if (Cloner.OutlinedFunctions.empty()) return false; - int SizeCost = 0; - BlockFrequency WeightedRcost; - int NonWeightedRcost; - auto OutliningCosts = computeOutliningCosts(Cloner); - assert(std::get<0>(OutliningCosts).isValid() && - std::get<1>(OutliningCosts).isValid() && "Expected valid costs"); - SizeCost = *std::get<0>(OutliningCosts).getValue(); - NonWeightedRcost = *std::get<1>(OutliningCosts).getValue(); + InstructionCost SizeCost = std::get<0>(OutliningCosts); + InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts); + + assert(SizeCost.isValid() && NonWeightedRcost.isValid() && + "Expected valid costs"); // Only calculate RelativeToEntryFreq when we are doing single region // outlining. @@ -1377,7 +1374,8 @@ // execute the calls to outlined functions. RelativeToEntryFreq = BranchProbability(0, 1); - WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq; + BlockFrequency WeightedRcost = + BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq; // The call sequence(s) to the outlined function(s) are larger than the sum of // the original outlined region size(s), it does not increase the chances of diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -840,7 +840,7 @@ } } - unsigned DuplicationCost = 0; + InstructionCost DuplicationCost = 0; unsigned JumpTableSize = 0; TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr, @@ -851,7 +851,7 @@ // using binary search, hence the LogBase2(). unsigned CondBranches = APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); - DuplicationCost = *Metrics.NumInsts.getValue() / CondBranches; + DuplicationCost = Metrics.NumInsts / CondBranches; } else { // Compared with jump tables, the DFA optimizer removes an indirect branch // on each loop iteration, thus making branch prediction more precise. The @@ -859,7 +859,7 @@ // predictor to make a mistake, and the more benefit there is in the DFA // optimizer. Thus, the more branch targets there are, the lower is the // cost of the DFA opt. - DuplicationCost = *Metrics.NumInsts.getValue() / JumpTableSize; + DuplicationCost = Metrics.NumInsts / JumpTableSize; } LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -682,7 +682,7 @@ // that each loop has at least three instructions (likely a conditional // branch, a comparison feeding that branch, and some kind of loop increment // feeding that comparison instruction). - if (LoopSize.isValid() && *LoopSize.getValue() < BEInsns + 1) + if (LoopSize.isValid() && LoopSize < BEInsns + 1) // This is an open coded max() on InstructionCost LoopSize = BEInsns + 1; diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -316,7 +316,7 @@ L->dump()); return Rotated; } - if (*Metrics.NumInsts.getValue() > MaxHeaderSize) { + if (Metrics.NumInsts > MaxHeaderSize) { LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains " << Metrics.NumInsts << " instructions, which is more than the threshold (" diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1182,7 +1182,7 @@ /// If interleave count has been specified by metadata it will be returned. /// Otherwise, the interleave count is computed and returned. VF and LoopCost /// are the selected vectorization factor and the cost of the selected VF. - unsigned selectInterleaveCount(ElementCount VF, unsigned LoopCost); + unsigned selectInterleaveCount(ElementCount VF, InstructionCost LoopCost); /// Memory access instruction may be vectorized in more than one way. /// Form of instruction after vectorization depends on cost. @@ -1701,8 +1701,9 @@ /// scalarize and their scalar costs are collected in \p ScalarCosts. A /// non-negative return value implies the expression will be scalarized. /// Currently, only single-use chains are considered for scalarization. - int computePredInstDiscount(Instruction *PredInst, ScalarCostsTy &ScalarCosts, - ElementCount VF); + InstructionCost computePredInstDiscount(Instruction *PredInst, + ScalarCostsTy &ScalarCosts, + ElementCount VF); /// Collect the instructions that are uniform after vectorization. An /// instruction is uniform if we represent it with a single scalar value in @@ -5636,8 +5637,9 @@ } } -unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, - unsigned LoopCost) { +unsigned +LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, + InstructionCost LoopCost) { // -- The interleave heuristics -- // We interleave the loop in order to expose ILP and reduce the loop overhead. // There are many micro-architectural considerations that we can't predict @@ -5673,9 +5675,8 @@ // If we did not calculate the cost for VF (because the user selected the VF) // then we calculate the cost of VF here. if (LoopCost == 0) { - InstructionCost C = expectedCost(VF).first; - assert(C.isValid() && "Expected to have chosen a VF with valid cost"); - LoopCost = *C.getValue(); + LoopCost = expectedCost(VF).first; + assert(LoopCost.isValid() && "Expected to have chosen a VF with valid cost"); // Loop body is free and there is no need for interleaving. if (LoopCost == 0) @@ -5803,8 +5804,8 @@ // We assume that the cost overhead is 1 and we use the cost model // to estimate the cost of the loop and interleave until the cost of the // loop overhead is about 5% of the cost of the loop. - unsigned SmallIC = - std::min(IC, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost)); + unsigned SmallIC = std::min( + IC, (unsigned)PowerOf2Floor(SmallLoopCost / *LoopCost.getValue())); // Interleave until store/load ports (estimated by max interleave count) are // saturated. @@ -6130,7 +6131,7 @@ } } -int LoopVectorizationCostModel::computePredInstDiscount( +InstructionCost LoopVectorizationCostModel::computePredInstDiscount( Instruction *PredInst, ScalarCostsTy &ScalarCosts, ElementCount VF) { assert(!isUniformAfterVectorization(PredInst, VF) && "Instruction marked uniform-after-vectorization will be predicated"); @@ -6239,7 +6240,7 @@ ScalarCosts[I] = ScalarCost; } - return *Discount.getValue(); + return Discount; } LoopVectorizationCostModel::VectorizationCostTy @@ -10305,7 +10306,7 @@ if (MaybeVF) { VF = *MaybeVF; // Select the interleave count. - IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue()); + IC = CM.selectInterleaveCount(VF.Width, VF.Cost); unsigned SelectedIC = std::max(IC, UserIC); // Optimistically generate runtime checks if they are needed. Drop them if