Diff 315727

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 124 Lines • ▼ Show 20 Lines

#include "llvm/IR/Verifier.h" #include "llvm/IR/Verifier.h"

#include "llvm/InitializePasses.h" #include "llvm/InitializePasses.h"

#include "llvm/Pass.h" #include "llvm/Pass.h"

#include "llvm/Support/Casting.h" #include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"

#include "llvm/Support/Compiler.h" #include "llvm/Support/Compiler.h"

#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/InstructionCost.h"

#include "llvm/Support/MathExtras.h" #include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/InjectTLIMappings.h" #include "llvm/Transforms/Utils/InjectTLIMappings.h"

#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopSimplify.h"

#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h"

#include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/Transforms/Utils/LoopVersioning.h"

#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

▲ Show 20 Lines • Show All 1,489 Lines • ▼ Show 20 Lines private:

/// The vectorization cost is a combination of the cost itself and a boolean /// The vectorization cost is a combination of the cost itself and a boolean

/// indicating whether any of the contributing operations will actually /// indicating whether any of the contributing operations will actually

/// operate on /// operate on

/// vector values after type legalization in the backend. If this latter value /// vector values after type legalization in the backend. If this latter value

/// is /// is

/// false, then all operations will be scalarized (i.e. no vectorization has /// false, then all operations will be scalarized (i.e. no vectorization has

/// actually taken place). /// actually taken place).

using VectorizationCostTy = std::pair<unsigned, bool>; using VectorizationCostTy = std::pair<InstructionCost, bool>;

/// Returns the expected execution cost. The unit of the cost does /// Returns the expected execution cost. The unit of the cost does

/// not matter because we use the 'cost' units to compare different /// not matter because we use the 'cost' units to compare different

/// vector widths. The cost that is returned is *not* normalized by /// vector widths. The cost that is returned is *not* normalized by

/// the factor width. /// the factor width.

VectorizationCostTy expectedCost(ElementCount VF); VectorizationCostTy expectedCost(ElementCount VF);

/// Returns the execution time cost of an instruction for a given vector /// Returns the execution time cost of an instruction for a given vector

/// width. Vector width of one means scalar. /// width. Vector width of one means scalar.

VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);

/// The cost-computation logic from getInstructionCost which provides /// The cost-computation logic from getInstructionCost which provides

/// the vector type as an output parameter. /// the vector type as an output parameter.

unsigned getInstructionCost(Instruction *I, ElementCount VF, Type *&VectorTy); InstructionCost getInstructionCost(Instruction *I, ElementCount VF,

Type *&VectorTy);

/// Calculate vectorization cost of memory instruction \p I. /// Calculate vectorization cost of memory instruction \p I.

unsigned getMemoryInstructionCost(Instruction *I, ElementCount VF); unsigned getMemoryInstructionCost(Instruction *I, ElementCount VF);

/// The cost computation for scalarized memory instruction. /// The cost computation for scalarized memory instruction.

unsigned getMemInstScalarizationCost(Instruction *I, ElementCount VF); unsigned getMemInstScalarizationCost(Instruction *I, ElementCount VF);

/// The cost computation for interleaving group of memory instructions. /// The cost computation for interleaving group of memory instructions.

Show All 27 Lines private:

/// Map of scalar integer values to the smallest bitwidth they can be legally /// Map of scalar integer values to the smallest bitwidth they can be legally

/// represented as. The vector equivalents of these values should be truncated /// represented as. The vector equivalents of these values should be truncated

/// to this type. /// to this type.

MapVector<Instruction *, uint64_t> MinBWs; MapVector<Instruction *, uint64_t> MinBWs;

/// A type representing the costs for instructions if they were to be /// A type representing the costs for instructions if they were to be

/// scalarized rather than vectorized. The entries are Instruction-Cost /// scalarized rather than vectorized. The entries are Instruction-Cost

/// pairs. /// pairs.

using ScalarCostsTy = DenseMap<Instruction *, unsigned>; using ScalarCostsTy = DenseMap<Instruction *, InstructionCost>;

/// A set containing all BasicBlocks that are known to present after /// A set containing all BasicBlocks that are known to present after

/// vectorization as a predicated block. /// vectorization as a predicated block.

SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization; SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization;

/// Records whether it is allowed to have the original scalar loop execute at /// Records whether it is allowed to have the original scalar loop execute at

/// least once. This may be needed as a fallback loop in case runtime /// least once. This may be needed as a fallback loop in case runtime

/// aliasing/dependence checks fail, or to handle the tail/remainder /// aliasing/dependence checks fail, or to handle the tail/remainder

▲ Show 20 Lines • Show All 4,049 Lines • ▼ Show 20 Lines

VectorizationFactor VectorizationFactor

LoopVectorizationCostModel::selectVectorizationFactor(ElementCount MaxVF) { LoopVectorizationCostModel::selectVectorizationFactor(ElementCount MaxVF) {

// FIXME: This can be fixed for scalable vectors later, because at this stage // FIXME: This can be fixed for scalable vectors later, because at this stage

// the LoopVectorizer will only consider vectorizing a loop with scalable // the LoopVectorizer will only consider vectorizing a loop with scalable

// vectors when the loop has a hint to enable vectorization for a given VF. // vectors when the loop has a hint to enable vectorization for a given VF.

assert(!MaxVF.isScalable() && "scalable vectors not yet supported"); assert(!MaxVF.isScalable() && "scalable vectors not yet supported");

float Cost = expectedCost(ElementCount::getFixed(1)).first; InstructionCost ExpectedCost = expectedCost(ElementCount::getFixed(1)).first;

const float ScalarCost = Cost; LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");

assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop");

sdesmalenUnsubmitted

Not Done

nit: ExpectedScalarCost ?

sdesmalen: nit: `ExpectedScalarCost` ?

unsigned Width = 1; unsigned Width = 1;

LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); const float ScalarCost = *ExpectedCost.getValue();

float Cost = ScalarCost;

bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;

if (ForceVectorization && MaxVF.isVector()) { if (ForceVectorization && MaxVF.isVector()) {

// Ignore scalar width, because the user explicitly wants vectorization. // Ignore scalar width, because the user explicitly wants vectorization.

// Initialize cost to max so that VF = 2 is, at least, chosen during cost // Initialize cost to max so that VF = 2 is, at least, chosen during cost

// evaluation. // evaluation.

Cost = std::numeric_limits<float>::max(); Cost = std::numeric_limits<float>::max();

} }

for (unsigned i = 2; i <= MaxVF.getFixedValue(); i *= 2) { for (unsigned i = 2; i <= MaxVF.getFixedValue(); i *= 2) {

// Notice that the vector loop needs to be executed less times, so // Notice that the vector loop needs to be executed less times, so

// we need to divide the cost of the vector loops by the width of // we need to divide the cost of the vector loops by the width of

// the vector elements. // the vector elements.

VectorizationCostTy C = expectedCost(ElementCount::getFixed(i)); VectorizationCostTy C = expectedCost(ElementCount::getFixed(i));

float VectorCost = C.first / (float)i; assert(C.first.isValid() && "Unexpected invalid cost for vector loop");

float VectorCost = *C.first.getValue() / (float)i;

ctetreauUnsubmitted

Done

VectorizationCostTy C = expectedCost(ElementCount::getFixed(i));

- std::pair<InstructionCost, unsigned> VectorCost = {C.first, i};

+ if (!C.first.isValid())

+ break; // or continue if we expect that a bigger vector might somehow be valid

+ InstructionCost::CostType VectorCost = *(C.first.getValue());

LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i

I think you can get rid of these pairs if you just check validity before assigning it.

ctetreau: I think you can get rid of these pairs if you just check validity before assigning it.

david-armAuthorUnsubmitted

Done

OK. I guess the problem is that we could start off with an invalid cost (ScalarCost) and get another Invalid cost in the loop too. I think we could get rid of the pairs, but it just makes the main loop more complicated that's all. What I could do is add this in the main loop:

if (!C.first.isValid())
  continue
if (!ScalarCost.isValid())
  MinCost = C.first;
// Both costs are now valid.
// ... use MinCost and C somehow ...

The other thing I was trying to do is move from using a float to represent the MinCost in the loop, to using an InstructionCost (integer based) instead. If I get rid of the pairs then it makes more sense to revert to using a float for MinCost I think, since then division (and hence a fractional cost) is involved.

I can try adding control flow to the loop like above?

david-arm: OK. I guess the problem is that we could start off with an invalid cost (ScalarCost) and get…

ctetreauUnsubmitted

Done

So I messed around with your patch, and came up with this loop body:

for (unsigned i = 2; i <= MaxVF.getFixedValue(); i *= 2) {
    // Notice that the vector loop needs to be executed less times, so
    // we need to divide the cost of the vector loops by the width of
    // the vector elements.
    VectorizationCostTy C = expectedCost(ElementCount::getFixed(i));

    Optional<float> VectorCost =
        C.first.getValue().map([i](InstructionCost::CostType Cost) {
          return static_cast<float>(Cost) / i;
        });

    // removed debug output noise

    if (auto MinCostVal = MinCost.getValue())
      if (VectorCost && *VectorCost < *MinCostVal) {
        Width = i;
        // requires ctor that takes an Optional, and ctors that convert from number like things
        MinCost = VectorCost;
      }
  }

I haven't ran the tests, but this compiles and I think it should work.

ctetreau: So I messed around with your patch, and came up with this loop body: ``` for (unsigned i = 2…

david-armAuthorUnsubmitted

Done

Hi @ctetreau, your suggestions for adding a conversion constructor/getter in general seem sensible to me, however I just have one question. In that example above it looks like you're rounding a float (VectorCost) to an integer, which is a change in behaviour I think? Currently the examples I'm thinking of are where we compare a cost of 11 for a width of 2, with a cost of 21 for a width of 4, i.e.

float(11)/2 = 5.5
float(21)/4 = 5.25

Previously we'd choose a width of 4 because 5.25 is less than 5.5. I think with your proposal we'd keep a width of 2 because 5.5 gets rounded down to 5. Unless we make MinCost use floats too, in which case the need for conversion goes away I think?

david-arm: Hi @ctetreau, your suggestions for adding a conversion constructor/getter in general seem…

ctetreauUnsubmitted

Done

Yeah, looks like I messed up. If you have MinCost be an Optional<float>, it seems like it should be fine. The main idea is that this whole function should be working with Optional<float> instead of InstructionCost. We can convert all InstructionCost objects to Optional<float> right away, do floating point math with them, and then convert back to unsigned for the return.

ctetreau: Yeah, looks like I messed up. If you have MinCost be an Optional<float>, it seems like it…

david-armAuthorUnsubmitted

Done

Hi @ctetreau, I've changed my patch now to avoid worrying about validity of the cost. Since we now assert the cost is valid we no longer need to use Optionals and so on.

david-arm: Hi @ctetreau, I've changed my patch now to avoid worrying about validity of the cost. Since we…

LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i

<< " costs: " << (int)VectorCost << ".\n"); << " costs: " << (int)VectorCost << ".\n");

if (!C.second && !ForceVectorization) { if (!C.second && !ForceVectorization) {

ctetreauUnsubmitted

Done

ForceVectorization needs to not blow up in the face of invalid InstructionCosts

ctetreau: ForceVectorization needs to not blow up in the face of invalid `InstructionCost`s

LLVM_DEBUG( LLVM_DEBUG(

dbgs() << "LV: Not considering vector loop of width " << i dbgs() << "LV: Not considering vector loop of width " << i

<< " because it will not generate any vector instructions.\n"); << " because it will not generate any vector instructions.\n");

continue; continue;

} }

// If profitable add it to ProfitableVF list. // If profitable add it to ProfitableVF list.

if (VectorCost < ScalarCost) { if (VectorCost < ScalarCost) {

▲ Show 20 Lines • Show All 322 Lines • ▼ Show 20 Lines unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,

else else

// Make sure IC is greater than 0. // Make sure IC is greater than 0.

IC = std::max(1u, IC); IC = std::max(1u, IC);

assert(IC > 0 && "Interleave count must be greater than 0."); assert(IC > 0 && "Interleave count must be greater than 0.");

// If we did not calculate the cost for VF (because the user selected the VF) // If we did not calculate the cost for VF (because the user selected the VF)

// then we calculate the cost of VF here. // then we calculate the cost of VF here.

if (LoopCost == 0) if (LoopCost == 0) {

LoopCost = expectedCost(VF).first; assert(expectedCost(VF).first.isValid() && "Expected a valid cost");

LoopCost = *expectedCost(VF).first.getValue();

}

assert(LoopCost && "Non-zero loop cost expected"); assert(LoopCost && "Non-zero loop cost expected");

// Interleave if we vectorized this loop and there is a reduction that could // Interleave if we vectorized this loop and there is a reduction that could

// benefit from interleaving. // benefit from interleaving.

if (VF.isVector() && HasReductions) { if (VF.isVector() && HasReductions) {

LLVM_DEBUG(dbgs() << "LV: Interleaving because of reductions.\n"); LLVM_DEBUG(dbgs() << "LV: Interleaving because of reductions.\n");

return IC; return IC;

▲ Show 20 Lines • Show All 305 Lines • ▼ Show 20 Lines for (Instruction &I : *BB)

ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end()); ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());

// Remember that BB will remain after vectorization. // Remember that BB will remain after vectorization.

PredicatedBBsAfterVectorization.insert(BB); PredicatedBBsAfterVectorization.insert(BB);

} }

int LoopVectorizationCostModel::computePredInstDiscount( int LoopVectorizationCostModel::computePredInstDiscount(

Instruction *PredInst, DenseMap<Instruction *, unsigned> &ScalarCosts, Instruction *PredInst, ScalarCostsTy &ScalarCosts, ElementCount VF) {

ElementCount VF) {

assert(!isUniformAfterVectorization(PredInst, VF) && assert(!isUniformAfterVectorization(PredInst, VF) &&

"Instruction marked uniform-after-vectorization will be predicated"); "Instruction marked uniform-after-vectorization will be predicated");

// Initialize the discount to zero, meaning that the scalar version and the // Initialize the discount to zero, meaning that the scalar version and the

// vector version cost the same. // vector version cost the same.

int Discount = 0; InstructionCost Discount = 0;

// Holds instructions to analyze. The instructions we visit are mapped in // Holds instructions to analyze. The instructions we visit are mapped in

// ScalarCosts. Those instructions are the ones that would be scalarized if // ScalarCosts. Those instructions are the ones that would be scalarized if

// we find that the scalar version costs less. // we find that the scalar version costs less.

SmallVector<Instruction *, 8> Worklist; SmallVector<Instruction *, 8> Worklist;

// Returns true if the given instruction can be scalarized. // Returns true if the given instruction can be scalarized.

auto canBeScalarized = [&](Instruction *I) -> bool { auto canBeScalarized = [&](Instruction *I) -> bool {

Show All 38 Lines while (!Worklist.empty()) {

Instruction *I = Worklist.pop_back_val(); Instruction *I = Worklist.pop_back_val();

// If we've already analyzed the instruction, there's nothing to do. // If we've already analyzed the instruction, there's nothing to do.

if (ScalarCosts.find(I) != ScalarCosts.end()) if (ScalarCosts.find(I) != ScalarCosts.end())

continue; continue;

// Compute the cost of the vector instruction. Note that this cost already // Compute the cost of the vector instruction. Note that this cost already

// includes the scalarization overhead of the predicated instruction. // includes the scalarization overhead of the predicated instruction.

unsigned VectorCost = getInstructionCost(I, VF).first; InstructionCost VectorCost = getInstructionCost(I, VF).first;

// Compute the cost of the scalarized instruction. This cost is the cost of // Compute the cost of the scalarized instruction. This cost is the cost of

// the instruction as if it wasn't if-converted and instead remained in the // the instruction as if it wasn't if-converted and instead remained in the

// predicated block. We will scale this cost by block probability after // predicated block. We will scale this cost by block probability after

// computing the scalarization overhead. // computing the scalarization overhead.

assert(!VF.isScalable() && "scalable vectors not yet supported."); assert(!VF.isScalable() && "scalable vectors not yet supported.");

unsigned ScalarCost = InstructionCost ScalarCost =

VF.getKnownMinValue() * VF.getKnownMinValue() *

getInstructionCost(I, ElementCount::getFixed(1)).first; getInstructionCost(I, ElementCount::getFixed(1)).first;

// Compute the scalarization overhead of needed insertelement instructions // Compute the scalarization overhead of needed insertelement instructions

// and phi nodes. // and phi nodes.

if (isScalarWithPredication(I) && !I->getType()->isVoidTy()) { if (isScalarWithPredication(I) && !I->getType()->isVoidTy()) {

ScalarCost += TTI.getScalarizationOverhead( ScalarCost += TTI.getScalarizationOverhead(

cast<VectorType>(ToVectorTy(I->getType(), VF)), cast<VectorType>(ToVectorTy(I->getType(), VF)),

Show All 26 Lines while (!Worklist.empty()) {

ScalarCost /= getReciprocalPredBlockProb(); ScalarCost /= getReciprocalPredBlockProb();

// Compute the discount. A non-negative discount means the vector version // Compute the discount. A non-negative discount means the vector version

// of the instruction costs more, and scalarizing would be beneficial. // of the instruction costs more, and scalarizing would be beneficial.

Discount += VectorCost - ScalarCost; Discount += VectorCost - ScalarCost;

ScalarCosts[I] = ScalarCost; ScalarCosts[I] = ScalarCost;

} }

return Discount; return *Discount.getValue();

} }

LoopVectorizationCostModel::VectorizationCostTy LoopVectorizationCostModel::VectorizationCostTy

LoopVectorizationCostModel::expectedCost(ElementCount VF) { LoopVectorizationCostModel::expectedCost(ElementCount VF) {

VectorizationCostTy Cost; VectorizationCostTy Cost;

// For each block. // For each block.

for (BasicBlock *BB : TheLoop->blocks()) { for (BasicBlock *BB : TheLoop->blocks()) {

VectorizationCostTy BlockCost; VectorizationCostTy BlockCost;

// For each instruction in the old loop. // For each instruction in the old loop.

for (Instruction &I : BB->instructionsWithoutDebug()) { for (Instruction &I : BB->instructionsWithoutDebug()) {

// Skip ignored values. // Skip ignored values.

if (ValuesToIgnore.count(&I) || if (ValuesToIgnore.count(&I) ||

(VF.isVector() && VecValuesToIgnore.count(&I))) (VF.isVector() && VecValuesToIgnore.count(&I)))

continue; continue;

VectorizationCostTy C = getInstructionCost(&I, VF); VectorizationCostTy C = getInstructionCost(&I, VF);

// Check if we should override the cost. // Check if we should override the cost.

if (ForceTargetInstructionCost.getNumOccurrences() > 0) if (ForceTargetInstructionCost.getNumOccurrences() > 0)

C.first = ForceTargetInstructionCost; C.first = InstructionCost(ForceTargetInstructionCost);

BlockCost.first += C.first; BlockCost.first += C.first;

BlockCost.second |= C.second; BlockCost.second |= C.second;

LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first

<< " for VF " << VF << " For instruction: " << I << " for VF " << VF << " For instruction: " << I

<< '\n'); << '\n');

} }

▲ Show 20 Lines • Show All 235 Lines • ▼ Show 20 Lines if (VF.isVector() && ForcedScalar != ForcedScalars.end()) {

if (InstSet.count(I)) if (InstSet.count(I))

return VectorizationCostTy( return VectorizationCostTy(

(getInstructionCost(I, ElementCount::getFixed(1)).first * (getInstructionCost(I, ElementCount::getFixed(1)).first *

VF.getKnownMinValue()), VF.getKnownMinValue()),

false); false);

} }

Type *VectorTy; Type *VectorTy;

unsigned C = getInstructionCost(I, VF, VectorTy); InstructionCost C = getInstructionCost(I, VF, VectorTy);

bool TypeNotScalarized = bool TypeNotScalarized =

VF.isVector() && VectorTy->isVectorTy() && VF.isVector() && VectorTy->isVectorTy() &&

TTI.getNumberOfParts(VectorTy) < VF.getKnownMinValue(); TTI.getNumberOfParts(VectorTy) < VF.getKnownMinValue();

return VectorizationCostTy(C, TypeNotScalarized); return VectorizationCostTy(C, TypeNotScalarized);

} }

unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,

▲ Show 20 Lines • Show All 177 Lines • ▼ Show 20 Lines if (isa<LoadInst>(I)) {

} }

} else } else

// Make sure I gets scalarized and a cost estimate without // Make sure I gets scalarized and a cost estimate without

// scalarization overhead. // scalarization overhead.

ForcedScalars[VF].insert(I); ForcedScalars[VF].insert(I);

} }

unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, InstructionCost

ElementCount VF, LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,

Type *&VectorTy) { Type *&VectorTy) {

Type *RetTy = I->getType(); Type *RetTy = I->getType();

if (canTruncateToMinimalBitwidth(I, VF)) if (canTruncateToMinimalBitwidth(I, VF))

RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);

VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF); VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF);

auto SE = PSE.getSE(); auto SE = PSE.getSE();

TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;

// TODO: We need to estimate the cost of intrinsic calls. // TODO: We need to estimate the cost of intrinsic calls.

▲ Show 20 Lines • Show All 258 Lines • ▼ Show 20 Lines LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,

case Instruction::Call: { case Instruction::Call: {

bool NeedToScalarize; bool NeedToScalarize;

CallInst *CI = cast<CallInst>(I); CallInst *CI = cast<CallInst>(I);

unsigned CallCost = getVectorCallCost(CI, VF, NeedToScalarize); unsigned CallCost = getVectorCallCost(CI, VF, NeedToScalarize);

if (getVectorIntrinsicIDForCall(CI, TLI)) if (getVectorIntrinsicIDForCall(CI, TLI))

return std::min(CallCost, getVectorIntrinsicCost(CI, VF)); return std::min(CallCost, getVectorIntrinsicCost(CI, VF));

return CallCost; return CallCost;

} }

case Instruction::ExtractValue: { case Instruction::ExtractValue:

InstructionCost ExtractCost = return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput);

TTI.getInstructionCost(I, TTI::TCK_RecipThroughput);

assert(ExtractCost.isValid() && "Invalid cost for ExtractValue");

return *(ExtractCost.getValue());

}

default: default:

// The cost of executing VF copies of the scalar instruction. This opcode // The cost of executing VF copies of the scalar instruction. This opcode

// is unknown. Assume that it is the same as 'mul'. // is unknown. Assume that it is the same as 'mul'.

return VF.getKnownMinValue() * TTI.getArithmeticInstrCost( return VF.getKnownMinValue() * TTI.getArithmeticInstrCost(

Instruction::Mul, VectorTy, CostKind) + Instruction::Mul, VectorTy, CostKind) +

getScalarizationOverhead(I, VF); getScalarizationOverhead(I, VF);

} // end of switch. } // end of switch.

} }

▲ Show 20 Lines • Show All 2,234 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[NFC][InstructionCost] Change LoopVectorizationCostModel::getInstructionCost to return InstructionCost
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 315727

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[NFC][InstructionCost] Change LoopVectorizationCostModel::getInstructionCost to return InstructionCostClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 315727

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

[NFC][InstructionCost] Change LoopVectorizationCostModel::getInstructionCost to return InstructionCost
ClosedPublic