diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -6,6 +6,7 @@ Vectorize.cpp VectorCombine.cpp VPlan.cpp + VPlanCostModel.cpp VPlanHCFGBuilder.cpp VPlanRecipes.cpp VPlanSLP.cpp diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -57,6 +57,7 @@ #include "LoopVectorizationPlanner.h" #include "VPRecipeBuilder.h" #include "VPlan.h" +#include "VPlanCostModel.h" #include "VPlanHCFGBuilder.h" #include "VPlanTransforms.h" #include "llvm/ADT/APInt.h" @@ -363,6 +364,11 @@ "support for outer loop vectorization.")); } +cl::opt CostUsingVPlan("vplan-use-vplan-cost-model", cl::init(false), + cl::Hidden, + cl::desc("Enable VPlan based costing path. To " + "become the default in the future.")); + // This flag enables the stress testing of the VPlan H-CFG construction in the // VPlan-native vectorization path. It must be used in conjuction with // -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the @@ -1171,6 +1177,8 @@ /// TargetTransformInfo to query the different backends for the cost of /// different operations. class LoopVectorizationCostModel { + friend class VPlanCostModel; + public: LoopVectorizationCostModel(ScalarEpilogueLowering SEL, Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, @@ -8648,6 +8656,20 @@ return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan)); } +Type *VPlanCostModel::truncateToMinimalBitwidth(Type *ValTy, + Instruction *I) const { + auto MinBWs = CM.getMinimalBitwidths(); + if (MinBWs.contains(I)) + ValTy = IntegerType::get(ValTy->getContext(), MinBWs[I]); + return ValTy; +} + +InstructionCost VPlanCostModel::getLegacyInstructionCost(Instruction *I, + ElementCount VF) { + VectorizationCostTy Cost = CM.getInstructionCost(I, VF); + return Cost.first; +} + void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF) { assert(OrigLoop->isInnermost() && "Inner loop expected."); @@ -8677,10 +8699,16 @@ VF = SubRange.End; } + VPlanCostModel VPCM(*TTI, CM); for (const VPlanPtr &Plan : VPlans) { SmallVector Costs; for (ElementCount CostVF : Plan->getVFs()) { - auto [VecCost, IsVec] = CM.expectedCost(CostVF, &InvalidCosts); + VectorizationCostTy C; + if (CostUsingVPlan) { + C.first = VPCM.expectedCost(*Plan, CostVF, C.second); + } else + C = CM.expectedCost(CostVF, &InvalidCosts); + auto [VecCost, IsVec] = C; #ifndef NDEBUG unsigned AssumedMinimumVscale = 1; if (std::optional VScale = getVScaleForTuning()) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -756,6 +756,11 @@ return cast(getVPSingleValue()->getUnderlyingValue()); } + bool hasUnderlyingInstr() const { + return getNumDefinedValues() == 1 && + getVPSingleValue()->getUnderlyingValue() != nullptr; + } + /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPDef *D) { // All VPDefs are also VPRecipeBases. diff --git a/llvm/lib/Transforms/Vectorize/VPlanCostModel.h b/llvm/lib/Transforms/Vectorize/VPlanCostModel.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/VPlanCostModel.h @@ -0,0 +1,66 @@ +//===- SiFive_VPlanCostModel.cpp - Vectorizer Cost Model ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// VPlan-based cost model +/// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Instruction.h" + +#include "VPlan.h" +#include "VPlanValue.h" + +namespace llvm { +class Type; +class TargetTransformInfo; +class LoopVectorizationCostModel; + +class VPlanCostModel { +public: + explicit VPlanCostModel(const TargetTransformInfo &TTI, + LoopVectorizationCostModel &CM) + : TTI(TTI), CM(CM) {} + + /// Return cost of the VPlan for a given \p VF + InstructionCost expectedCost(const VPlan &Plan, ElementCount VF, bool &IsVec); + +private: + /// Return individual cost of the \p VPBasicBlock for a given \p VF + InstructionCost getCost(const VPBlockBase *Block, ElementCount VF, + bool &IsVec); + + /// Return individual cost of the \p Recipe for a given \p VF + InstructionCost getCost(const VPRecipeBase *Recipe, ElementCount VF, + bool &IsVec); + + /// Return individual cost of the \p Recipe for a given \p VF + InstructionCost getLegacyInstructionCost(Instruction *I, ElementCount VF); + + InstructionCost getMemoryOpCost(const VPWidenMemoryInstructionRecipe *VPWMIR, + ElementCount VF, Type *&VectorTy); + + /// Return cost of the individual memory operation of a instruction \p I of a + /// given type \p Ty + InstructionCost getMemoryOpCost(const Instruction *I, Type *Ty, + bool IsConsecutive, bool IsMasked, + bool IsReverse); + + Type *truncateToMinimalBitwidth(Type *ValTy, Instruction *I) const; + + /// Vector target information. + const TargetTransformInfo &TTI; + + /// FIXME: Legacy model is only here during our transition to the vplan-based + /// model + LoopVectorizationCostModel &CM; + + /// Use same cost kind in the cost model + const TargetTransformInfo::TargetCostKind CostKind = TTI::TCK_RecipThroughput; +}; +} // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanCostModel.cpp b/llvm/lib/Transforms/Vectorize/VPlanCostModel.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/VPlanCostModel.cpp @@ -0,0 +1,138 @@ +//===- VPlanCostModel.h - VPlan-based Vectorizer Cost Model ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// VPlan-based cost model +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/TypeSwitch.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Debug.h" + +#include "VPlan.h" +#include "VPlanCFG.h" +#include "VPlanCostModel.h" +#include "VPlanValue.h" + +using namespace llvm; + +#define DEBUG_TYPE "vplan-cost-model" + +namespace llvm { +InstructionCost VPlanCostModel::expectedCost(const VPlan &Plan, ElementCount VF, + bool &IsVec) { + InstructionCost VectorIterCost = 0; + for (const VPBlockBase *Block : vp_depth_first_deep(Plan.getEntry())) + VectorIterCost += getCost(Block, VF, IsVec); + + return VectorIterCost; +} + +InstructionCost VPlanCostModel::getCost(const VPBlockBase *Block, + ElementCount VF, bool &IsVec) { + return TypeSwitch(Block) + .Case([&](const VPBasicBlock *BBlock) { + InstructionCost Cost = 0; + for (const VPRecipeBase &Recipe : *BBlock) + Cost += getCost(Block, VF, IsVec); + return Cost; + }) + .Default([&](const VPBlockBase *BBlock) -> InstructionCost { return 0; }); +} + +InstructionCost VPlanCostModel::getCost(const VPRecipeBase *Recipe, + ElementCount VF, bool &IsVec) { + Type *VectorTy = nullptr; + InstructionCost Cost = + TypeSwitch(Recipe) + .Case( + [&](const VPWidenMemoryInstructionRecipe *VPWMIR) { + return getMemoryOpCost(VPWMIR, VF, VectorTy); + }) + .Default([&](const VPRecipeBase *R) -> InstructionCost { + if (!R->hasUnderlyingInstr()) { + LLVM_DEBUG( + dbgs() << "VPlanCM: unsupported recipe "; + VPSlotTracker SlotTracker((Recipe->getParent()) + ? Recipe->getParent()->getPlan() + : nullptr); + Recipe->print(dbgs(), Twine(), SlotTracker); dbgs() << '\n'); + return 0; + } + Instruction *I = const_cast(R->getUnderlyingInstr()); + return getLegacyInstructionCost(I, VF); + }); + + LLVM_DEBUG(dbgs() << "VPlanCM: cost " << Cost << " for VF " << VF + << " for VPInstruction: "; + VPSlotTracker SlotTracker((Recipe->getParent()) + ? Recipe->getParent()->getPlan() + : nullptr); + Recipe->print(dbgs(), Twine(), SlotTracker); dbgs() << '\n'); + if (VF.isVector() && VectorTy->isVectorTy()) { + if (unsigned NumParts = TTI.getNumberOfParts(VectorTy)) { + if (VF.isScalable()) + // is assumed to be profitable over iN because + // scalable registers are a distinct register class from scalar ones. + // If we ever find a target which wants to lower scalable vectors + // back to scalars, we'll need to update this code to explicitly + // ask TTI about the register class uses for each part. + IsVec |= NumParts <= VF.getKnownMinValue(); + else + IsVec |= NumParts < VF.getKnownMinValue(); + } else + Cost = InstructionCost::getInvalid(); + } + return Cost; +} + +InstructionCost VPlanCostModel::getMemoryOpCost(const Instruction *I, Type *Ty, + bool IsConsecutive, + bool IsMasked, bool IsReverse) { + const Align Alignment = getLoadStoreAlignment(const_cast(I)); + const Value *Ptr = getLoadStorePointerOperand(I); + unsigned AS = getLoadStoreAddressSpace(const_cast(I)); + if (IsConsecutive) { + InstructionCost Cost = 0; + if (IsMasked) { + Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), Ty, Alignment, AS, + CostKind); + } else { + TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0)); + Cost += TTI.getMemoryOpCost(I->getOpcode(), Ty, Alignment, AS, CostKind, + OpInfo, I); + } + if (IsReverse) + Cost += + TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, + cast(Ty), std::nullopt, CostKind, 0); + return Cost; + } + return TTI.getAddressComputationCost(Ty) + + TTI.getGatherScatterOpCost(I->getOpcode(), Ty, Ptr, IsMasked, + Alignment, CostKind, I); +} + +InstructionCost +VPlanCostModel::getMemoryOpCost(const VPWidenMemoryInstructionRecipe *VPWMIR, + ElementCount VF, Type *&VectorTy) { + Instruction *I = &VPWMIR->getIngredient(); + Type *ValTy = getLoadStoreType(I); + ValTy = truncateToMinimalBitwidth(ValTy, I); + const bool IsMasked = VPWMIR->getMask() != nullptr; + VectorTy = VectorType::get(ValTy, VF); + + return getMemoryOpCost(I, VectorTy, VPWMIR->isConsecutive(), IsMasked, + VPWMIR->isReverse()); +} + +} // namespace llvm