diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -219,31 +219,6 @@ TCK_SizeAndLatency ///< The weighted sum of size and latency. }; - /// Query the cost of a specified instruction. - /// - /// Clients should use this interface to query the cost of an existing - /// instruction. The instruction must have a valid parent (basic block). - /// - /// Note, this method does not cache the cost calculation and it - /// can be expensive in some cases. - InstructionCost getInstructionCost(const Instruction *I, - enum TargetCostKind kind) const { - InstructionCost Cost; - switch (kind) { - case TCK_RecipThroughput: - Cost = getInstructionThroughput(I); - break; - case TCK_Latency: - Cost = getInstructionLatency(I); - break; - case TCK_CodeSize: - case TCK_SizeAndLatency: - Cost = getUserCost(I, kind); - break; - } - return Cost; - } - /// Underlying constants for 'cost' values in this interface. /// /// Many APIs in this interface return a cost. This enum defines the @@ -322,14 +297,16 @@ /// /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. - InstructionCost getUserCost(const User *U, ArrayRef Operands, - TargetCostKind CostKind) const; + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TargetCostKind CostKind) const; - /// This is a helper function which calls the two-argument getUserCost + /// This is a helper function which calls the two-argument getInstructionCost /// with \p Operands which are the current operands U has. - InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const { + InstructionCost getInstructionCost(const User *U, + TargetCostKind CostKind) const { SmallVector Operands(U->operand_values()); - return getUserCost(U, Operands, CostKind); + return getInstructionCost(U, Operands, CostKind); } /// If a branch or a select condition is skewed in one direction by more than @@ -434,11 +411,11 @@ /// Parameters that control the generic loop unrolling transformation. struct UnrollingPreferences { /// The cost threshold for the unrolled loop. Should be relative to the - /// getUserCost values returned by this API, and the expectation is that - /// the unrolled loop's instructions when run through that interface should - /// not exceed this cost. However, this is only an estimate. Also, specific - /// loops may be unrolled even with a cost above this threshold if deemed - /// profitable. Set this to UINT_MAX to disable the loop body cost + /// getInstructionCost values returned by this API, and the expectation is + /// that the unrolled loop's instructions when run through that interface + /// should not exceed this cost. However, this is only an estimate. Also, + /// specific loops may be unrolled even with a cost above this threshold if + /// deemed profitable. Set this to UINT_MAX to disable the loop body cost /// restriction. unsigned Threshold; /// If complete unrolling will reduce the cost of the loop, we will boost @@ -1521,14 +1498,6 @@ /// @} private: - /// Estimate the latency of specified instruction. - /// Returns 1 as the default value. - InstructionCost getInstructionLatency(const Instruction *I) const; - - /// Returns the expected throughput cost of the instruction. - /// Returns -1 if the cost is unknown. - InstructionCost getInstructionThroughput(const Instruction *I) const; - /// The abstract base class used to type erase specific TTI /// implementations. class Concept; @@ -1555,9 +1524,9 @@ getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) = 0; - virtual InstructionCost getUserCost(const User *U, - ArrayRef Operands, - TargetCostKind CostKind) = 0; + virtual InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TargetCostKind CostKind) = 0; virtual BranchProbability getPredictableBranchThreshold() = 0; virtual bool hasBranchDivergence() = 0; virtual bool useGPUDivergenceAnalysis() = 0; @@ -1872,7 +1841,6 @@ virtual bool supportsScalableVectors() const = 0; virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const = 0; - virtual InstructionCost getInstructionLatency(const Instruction *I) = 0; virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; }; @@ -1907,9 +1875,10 @@ InstructionCost getMemcpyCost(const Instruction *I) override { return Impl.getMemcpyCost(I); } - InstructionCost getUserCost(const User *U, ArrayRef Operands, - TargetCostKind CostKind) override { - return Impl.getUserCost(U, Operands, CostKind); + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TargetCostKind CostKind) override { + return Impl.getInstructionCost(U, Operands, CostKind); } BranchProbability getPredictableBranchThreshold() override { return Impl.getPredictableBranchThreshold(); @@ -2524,10 +2493,6 @@ return Impl.hasActiveVectorLength(Opcode, DataType, Alignment); } - InstructionCost getInstructionLatency(const Instruction *I) override { - return Impl.getInstructionLatency(I); - } - VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const override { return Impl.getVPLegalizationStrategy(PI); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -993,8 +993,9 @@ return TTI::TCC_Basic; } - InstructionCost getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind) { + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { using namespace llvm::PatternMatch; auto *TargetTTI = static_cast(this); @@ -1097,6 +1098,9 @@ CostKind, I); } case Instruction::Load: { + // FIXME: Arbitary cost which could come from the backend. + if (CostKind == TTI::TCK_Latency) + return 4; auto *LI = cast(U); Type *LoadType = U->getType(); // If there is a non-register sized type, the cost estimation may expand @@ -1248,39 +1252,10 @@ return TargetTTI->getVectorInstrCost(*EEI, DstTy, Idx); } } - // By default, just classify everything as 'basic'. - return TTI::TCC_Basic; - } - - InstructionCost getInstructionLatency(const Instruction *I) { - SmallVector Operands(I->operand_values()); - if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) - return 0; - if (isa(I)) - return 4; - - Type *DstTy = I->getType(); - - // Usually an intrinsic is a simple instruction. - // A real function call is much slower. - if (auto *CI = dyn_cast(I)) { - const Function *F = CI->getCalledFunction(); - if (!F || static_cast(this)->isLoweredToCall(F)) - return 40; - // Some intrinsics return a value and a flag, we use the value type - // to decide its latency. - if (StructType *StructTy = dyn_cast(DstTy)) - DstTy = StructTy->getElementType(0); - // Fall through to simple instructions. - } - - if (VectorType *VectorTy = dyn_cast(DstTy)) - DstTy = VectorTy->getElementType(); - if (DstTy->isFloatingPointTy()) - return 3; - - return 1; + // By default, just classify everything as 'basic' or -1 to represent that + // don't know the throughput cost. + return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic; } }; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -636,13 +636,6 @@ SimplifyAndSetOp); } - InstructionCost getInstructionLatency(const Instruction *I) { - if (isa(I)) - return getST()->getSchedModel().DefaultLoadLatency; - - return BaseT::getInstructionLatency(I); - } - virtual Optional getCacheSize(TargetTransformInfo::CacheLevel Level) const { return Optional( diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp --- a/llvm/lib/Analysis/CodeMetrics.cpp +++ b/llvm/lib/Analysis/CodeMetrics.cpp @@ -177,7 +177,7 @@ if (InvI->cannotDuplicate()) notDuplicatable = true; - NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); + NumInsts += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); } if (isa(BB->getTerminator())) diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp --- a/llvm/lib/Analysis/CostModel.cpp +++ b/llvm/lib/Analysis/CostModel.cpp @@ -100,8 +100,8 @@ for (BasicBlock &B : *F) { for (Instruction &Inst : B) { InstructionCost Cost; - if (TypeBasedIntrinsicCost && isa(&Inst)) { - auto *II = dyn_cast(&Inst); + auto *II = dyn_cast(&Inst); + if (II && TypeBasedIntrinsicCost) { IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II, InstructionCost::getInvalid(), true); Cost = TTI->getIntrinsicInstrCost(ICA, CostKind); @@ -128,8 +128,8 @@ // TODO: Use a pass parameter instead of cl::opt CostKind to determine // which cost kind to print. InstructionCost Cost; - if (TypeBasedIntrinsicCost && isa(&Inst)) { - auto *II = dyn_cast(&Inst); + auto *II = dyn_cast(&Inst); + if (II && TypeBasedIntrinsicCost) { IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II, InstructionCost::getInvalid(), true); Cost = TTI.getIntrinsicInstrCost(ICA, CostKind); diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1352,8 +1352,8 @@ Operands.push_back(SimpleOp); else Operands.push_back(Op); - return TTI.getUserCost(&GEP, Operands, - TargetTransformInfo::TCK_SizeAndLatency) == + return TTI.getInstructionCost(&GEP, Operands, + TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } @@ -1630,7 +1630,7 @@ if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) SROAArgValues[&I] = SROAArg; - return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == + return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } @@ -1653,7 +1653,7 @@ if (auto *SROAArg = getSROAArgForValueOrNull(Op)) SROAArgValues[&I] = SROAArg; - return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == + return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } @@ -1683,7 +1683,7 @@ break; } - return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == + return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } @@ -2381,7 +2381,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. - if (TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == + if (TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free) return true; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -221,10 +221,10 @@ } InstructionCost -TargetTransformInfo::getUserCost(const User *U, - ArrayRef Operands, - enum TargetCostKind CostKind) const { - InstructionCost Cost = TTIImpl->getUserCost(U, Operands, CostKind); +TargetTransformInfo::getInstructionCost(const User *U, + ArrayRef Operands, + enum TargetCostKind CostKind) const { + InstructionCost Cost = TTIImpl->getInstructionCost(U, Operands, CostKind); assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) && "TTI should not produce negative costs!"); return Cost; @@ -1149,70 +1149,6 @@ return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment); } -InstructionCost -TargetTransformInfo::getInstructionLatency(const Instruction *I) const { - return TTIImpl->getInstructionLatency(I); -} - -InstructionCost -TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - - switch (I->getOpcode()) { - case Instruction::GetElementPtr: - case Instruction::Ret: - case Instruction::PHI: - case Instruction::Br: - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::FNeg: - case Instruction::Select: - case Instruction::ICmp: - case Instruction::FCmp: - case Instruction::Store: - case Instruction::Load: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: - case Instruction::AddrSpaceCast: - case Instruction::ExtractElement: - case Instruction::InsertElement: - case Instruction::ExtractValue: - case Instruction::ShuffleVector: - case Instruction::Call: - case Instruction::Switch: - return getUserCost(I, CostKind); - default: - // We don't have any information on this instruction. - return -1; - } -} - TargetTransformInfo::Concept::~Concept() = default; TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6603,8 +6603,8 @@ // If it's safe to speculatively execute, then it should not have side // effects; therefore, it's safe to sink and possibly *not* execute. return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && - TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >= - TargetTransformInfo::TCC_Expensive; + TTI->getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency) >= + TargetTransformInfo::TCC_Expensive; } /// Returns true if a SelectInst should be turned into an explicit branch. diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2344,8 +2344,8 @@ } SmallVector Operands(I.operand_values()); - Cost += - getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency); + Cost += getInstructionCost(&I, Operands, + TargetTransformInfo::TCK_SizeAndLatency); } } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -165,8 +165,9 @@ /// @} - InstructionCost getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind); + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind); // Hexagon specific decision to generate a lookup table. bool shouldBuildLookupTables() const; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -342,9 +342,10 @@ return ST.getL1CacheLineSize(); } -InstructionCost HexagonTTIImpl::getUserCost(const User *U, - ArrayRef Operands, - TTI::TargetCostKind CostKind) { +InstructionCost +HexagonTTIImpl::getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool { if (!CI->isIntegerCast()) return false; @@ -366,7 +367,7 @@ if (const CastInst *CI = dyn_cast(U)) if (isCastFoldedIntoLoad(CI)) return TargetTransformInfo::TCC_Free; - return BaseT::getUserCost(U, Operands, CostKind); + return BaseT::getInstructionCost(U, Operands, CostKind); } bool HexagonTTIImpl::shouldBuildLookupTables() const { diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -59,8 +59,9 @@ const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - InstructionCost getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind); + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -321,22 +321,22 @@ (Ty->getPrimitiveSizeInBits() > 128); } -InstructionCost PPCTTIImpl::getUserCost(const User *U, - ArrayRef Operands, - TTI::TargetCostKind CostKind) { +InstructionCost PPCTTIImpl::getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { // We already implement getCastInstrCost and getMemoryOpCost where we perform // the vector adjustment there. if (isa(U) || isa(U) || isa(U)) - return BaseT::getUserCost(U, Operands, CostKind); + return BaseT::getInstructionCost(U, Operands, CostKind); if (U->getType()->isVectorTy()) { // Instructions that need to be split should cost more. std::pair LT = TLI->getTypeLegalizationCost(DL, U->getType()); - return LT.first * BaseT::getUserCost(U, Operands, CostKind); + return LT.first * BaseT::getInstructionCost(U, Operands, CostKind); } - return BaseT::getUserCost(U, Operands, CostKind); + return BaseT::getInstructionCost(U, Operands, CostKind); } // Determining the address of a TLS variable results in a function call in diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -492,8 +492,8 @@ } SmallVector Operands(I.operand_values()); - Cost += - getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency); + Cost += getInstructionCost(&I, Operands, + TargetTransformInfo::TCK_SizeAndLatency); } } diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -573,7 +573,8 @@ if (!I) return std::numeric_limits::min(); - auto Cost = TTI.getUserCost(U, TargetTransformInfo::TCK_SizeAndLatency); + auto Cost = + TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency); // Traverse recursively if there are more uses. // TODO: Any other instructions to be added here? diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -561,8 +561,8 @@ if (CI->cannotDuplicate() || CI->isConvergent()) return ~0U; - if (TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) - == TargetTransformInfo::TCC_Free) + if (TTI->getInstructionCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) == + TargetTransformInfo::TCC_Free) continue; // All other instructions count for at least one unit. diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1316,10 +1316,11 @@ /// Return true if the instruction is free in the loop. static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI) { + InstructionCost CostI = + TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); if (const GetElementPtrInst *GEP = dyn_cast(&I)) { - if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) != - TargetTransformInfo::TCC_Free) + if (CostI != TargetTransformInfo::TCC_Free) return false; // For a GEP, we cannot simply use getUserCost because currently it // optimistically assumes that a GEP will fold into addressing mode @@ -1333,9 +1334,9 @@ return false; } return true; - } else - return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == - TargetTransformInfo::TCC_Free; + } + + return CostI == TargetTransformInfo::TCC_Free; } /// Return true if the only users of this instruction are outside of diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp --- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -552,7 +552,7 @@ m_Specific(FI.InnerTripCount)))) continue; InstructionCost Cost = - TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump()); RepeatedInstrCost += Cost; } diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -443,7 +443,7 @@ // First accumulate the cost of this instruction. if (!Cost.IsFree) { - UnrolledCost += TTI.getUserCost(I, CostKind); + UnrolledCost += TTI.getInstructionCost(I, CostKind); LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "); LLVM_DEBUG(I->dump()); @@ -537,7 +537,7 @@ // Track this instruction's expected baseline cost when executing the // rolled loop form. - RolledDynamicCost += TTI.getUserCost(&I, CostKind); + RolledDynamicCost += TTI.getInstructionCost(&I, CostKind); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns true, mark the instruction as free after diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2878,7 +2878,7 @@ if (CB->isConvergent() || CB->cannotDuplicate()) return false; - Cost += TTI.getUserCost(&I, CostKind); + Cost += TTI.getInstructionCost(&I, CostKind); } assert(Cost >= 0 && "Must not have negative costs!"); LoopCost += Cost; diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -252,7 +252,7 @@ case Instruction::ShuffleVector: case Instruction::ExtractValue: case Instruction::InsertValue: - return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); + return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency); default: return InstructionCost::getInvalid(); // Disallow anything not explicitly diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -381,7 +381,7 @@ assert((!isa(I) || isSafeToSpeculativelyExecute(cast(I))) && "Instruction is not safe to speculatively execute!"); - return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); + return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency); } /// If we have a merge point of an "if condition" as accepted above, @@ -3625,8 +3625,8 @@ // Account for the cost of duplicating this instruction into each // predecessor. Ignore free instructions. - if (!TTI || - TTI->getUserCost(&I, CostKind) != TargetTransformInfo::TCC_Free) { + if (!TTI || TTI->getInstructionCost(&I, CostKind) != + TargetTransformInfo::TCC_Free) { NumBonusInsts += PredCount; // Early exits once we reach the limit. @@ -3798,7 +3798,8 @@ return false; // Not in white-list - not worthwhile folding. // And finally, if this is a non-free instruction that we are okay // speculating, ensure that we consider the speculation budget. - Cost += TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + Cost += + TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); if (Cost > Budget) return false; // Eagerly refuse to fold as soon as we're out of budget. } diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll @@ -12,7 +12,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %r ; ; LATE-LABEL: 'fadd_v2f64' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = fadd %a, %b +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = fadd %a, %b ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %r ; ; SIZE-LABEL: 'fadd_v2f64' @@ -33,7 +33,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %r ; ; LATE-LABEL: 'sqrt_v2f64' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = call @llvm.sqrt.nxv2f64( %a) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call @llvm.sqrt.nxv2f64( %a) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %r ; ; SIZE-LABEL: 'sqrt_v2f64' diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -49,8 +49,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'smax' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'smax' @@ -75,8 +75,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmuladd' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) +; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmuladd' @@ -101,8 +101,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'log2' -; LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %s = call float @llvm.log2.f32(float %a) -; LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) +; LATE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'log2' @@ -127,8 +127,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'constrained_fadd' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; LATE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'constrained_fadd' @@ -153,8 +153,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmaximum' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmaximum' @@ -180,7 +180,7 @@ ; ; LATE-LABEL: 'cttz' ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'cttz' @@ -206,7 +206,7 @@ ; ; LATE-LABEL: 'ctlz' ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'ctlz' @@ -231,8 +231,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fshl' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; LATE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fshl' @@ -256,7 +256,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedgather' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedgather' @@ -277,7 +277,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedscatter' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedscatter' @@ -298,7 +298,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fmax' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmax' @@ -319,7 +319,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'memcpy' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'memcpy' diff --git a/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll b/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll --- a/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll @@ -16,7 +16,7 @@ ; ; CHECK-THUMB2-LAT-LABEL: 'intrinsics' ; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef) -; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0f16(half* undef) +; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0f16(half* undef) ; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48) ; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef) ; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void diff --git a/llvm/test/Analysis/CostModel/SystemZ/ext-of-icmp-cost.ll b/llvm/test/Analysis/CostModel/SystemZ/ext-of-icmp-cost.ll --- a/llvm/test/Analysis/CostModel/SystemZ/ext-of-icmp-cost.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/ext-of-icmp-cost.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -passes="print" -cost-kind=code-size 2>&1 -disable-output \ ; RUN: -mtriple=s390x-unknown-linux -mcpu=z13 | FileCheck %s ; -; Check that getUserCost() does not return TCC_Free for extensions of +; Check that getInstructionCost() does not return TCC_Free for extensions of ; i1 returned from icmp. define i64 @fun1(i64 %v) { diff --git a/llvm/test/Analysis/CostModel/X86/costmodel.ll b/llvm/test/Analysis/CostModel/X86/costmodel.ll --- a/llvm/test/Analysis/CostModel/X86/costmodel.ll +++ b/llvm/test/Analysis/CostModel/X86/costmodel.ll @@ -19,7 +19,7 @@ ; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %P2I = ptrtoint i8* undef to i64 ; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %TC = trunc i64 undef to i32 ; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) -; LATENCY-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void undef() +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void undef() ; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 undef ; ; CODESIZE-LABEL: 'foo' diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -52,8 +52,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'umul' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'umul' @@ -78,8 +78,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'smax' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'smax' @@ -104,8 +104,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmuladd' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmuladd' @@ -130,8 +130,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'log2' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.log2.f32(float %a) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) +; LATE-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'log2' @@ -156,8 +156,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'constrained_fadd' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'constrained_fadd' @@ -182,8 +182,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmaximum' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmaximum' @@ -208,8 +208,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'cttz' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'cttz' @@ -234,8 +234,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'ctlz' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) +; LATE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'ctlz' @@ -260,8 +260,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fshl' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fshl' @@ -285,7 +285,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedgather' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedgather' @@ -306,7 +306,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedscatter' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedscatter' @@ -327,7 +327,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fmax' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmax' @@ -348,7 +348,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fmul' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul' @@ -369,7 +369,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fadd_fast' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_fast' @@ -390,7 +390,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'memcpy' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'memcpy' diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll --- a/llvm/test/CodeGen/X86/select-optimize.ll +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -246,18 +246,13 @@ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[X1:%.*]] = phi double [ [[X2:%.*]], [[SELECT_END]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[X1:%.*]] = phi double [ [[X2:%.*]], [[FOR_BODY]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[R:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]] -; CHECK-NEXT: [[X2_FROZEN:%.*]] = freeze i1 [[CMP2]] -; CHECK-NEXT: br i1 [[X2_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END]], !prof [[PROF27:![0-9]+]] -; CHECK: select.true.sink: ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] -; CHECK-NEXT: br label [[SELECT_END]] -; CHECK: select.end: -; CHECK-NEXT: [[X2]] = phi double [ [[SUB]], [[SELECT_TRUE_SINK]] ], [ [[X1]], [[FOR_BODY]] ] +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]] +; CHECK-NEXT: [[X2]] = select i1 [[CMP2]], double [[SUB]], double [[X1]], !prof [[PROF27:![0-9]+]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] @@ -451,18 +446,13 @@ ; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[X_ADDR_0_LCSSA]], [[C_0_LCSSA]] ; CHECK-NEXT: ret double [[ADD5]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ] -; CHECK-NEXT: [[X_ADDR_022:%.*]] = phi double [ [[X]], [[FOR_BODY_PREHEADER]] ], [ [[X_ADDR_1]], [[SELECT_END]] ] -; CHECK-NEXT: [[C_020:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[C_1]], [[SELECT_END]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X_ADDR_022:%.*]] = phi double [ [[X]], [[FOR_BODY_PREHEADER]] ], [ [[X_ADDR_1]], [[FOR_BODY]] ] +; CHECK-NEXT: [[C_020:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[C_1]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[X_ADDR_022]], [[TMP0]] -; CHECK-NEXT: [[SUB_FROZEN:%.*]] = freeze i1 [[CMP1]] -; CHECK-NEXT: br i1 [[SUB_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]] -; CHECK: select.false: -; CHECK-NEXT: br label [[SELECT_END]] -; CHECK: select.end: -; CHECK-NEXT: [[SUB:%.*]] = phi double [ [[TMP0]], [[FOR_BODY]] ], [ 0.000000e+00, [[SELECT_FALSE]] ] +; CHECK-NEXT: [[SUB:%.*]] = select i1 [[CMP1]], double [[TMP0]], double 0.000000e+00 ; CHECK-NEXT: [[X_ADDR_1]] = fsub double [[X_ADDR_022]], [[SUB]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[K:%.*]], [[N]]