diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -219,29 +219,6 @@ TCK_SizeAndLatency ///< The weighted sum of size and latency. }; - /// Query the cost of a specified instruction. - /// - /// Clients should use this interface to query the cost of an existing - /// instruction. The instruction must have a valid parent (basic block). - /// - /// Note, this method does not cache the cost calculation and it - /// can be expensive in some cases. - InstructionCost getInstructionCost(const Instruction *I, - enum TargetCostKind kind) const { - InstructionCost Cost; - switch (kind) { - case TCK_Latency: - Cost = getInstructionLatency(I); - break; - case TCK_RecipThroughput: - case TCK_CodeSize: - case TCK_SizeAndLatency: - Cost = getUserCost(I, kind); - break; - } - return Cost; - } - /// Underlying constants for 'cost' values in this interface. /// /// Many APIs in this interface return a cost. This enum defines the @@ -320,14 +297,16 @@ /// /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. - InstructionCost getUserCost(const User *U, ArrayRef Operands, - TargetCostKind CostKind) const; + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TargetCostKind CostKind) const; - /// This is a helper function which calls the two-argument getUserCost - /// with \p Operands which are the current operands U has. - InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const { + /// This is a helper function which calls the three-argument + /// getInstructionCost with \p Operands which are the current operands U has. + InstructionCost getInstructionCost(const User *U, + TargetCostKind CostKind) const { SmallVector Operands(U->operand_values()); - return getUserCost(U, Operands, CostKind); + return getInstructionCost(U, Operands, CostKind); } /// If a branch or a select condition is skewed in one direction by more than @@ -432,11 +411,11 @@ /// Parameters that control the generic loop unrolling transformation. struct UnrollingPreferences { /// The cost threshold for the unrolled loop. Should be relative to the - /// getUserCost values returned by this API, and the expectation is that - /// the unrolled loop's instructions when run through that interface should - /// not exceed this cost. However, this is only an estimate. Also, specific - /// loops may be unrolled even with a cost above this threshold if deemed - /// profitable. Set this to UINT_MAX to disable the loop body cost + /// getInstructionCost values returned by this API, and the expectation is + /// that the unrolled loop's instructions when run through that interface + /// should not exceed this cost. However, this is only an estimate. Also, + /// specific loops may be unrolled even with a cost above this threshold if + /// deemed profitable. Set this to UINT_MAX to disable the loop body cost /// restriction. unsigned Threshold; /// If complete unrolling will reduce the cost of the loop, we will boost @@ -1519,10 +1498,6 @@ /// @} private: - /// Estimate the latency of specified instruction. - /// Returns 1 as the default value. - InstructionCost getInstructionLatency(const Instruction *I) const; - /// The abstract base class used to type erase specific TTI /// implementations. class Concept; @@ -1549,9 +1524,9 @@ getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) = 0; - virtual InstructionCost getUserCost(const User *U, - ArrayRef Operands, - TargetCostKind CostKind) = 0; + virtual InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TargetCostKind CostKind) = 0; virtual BranchProbability getPredictableBranchThreshold() = 0; virtual bool hasBranchDivergence() = 0; virtual bool useGPUDivergenceAnalysis() = 0; @@ -1866,7 +1841,6 @@ virtual bool supportsScalableVectors() const = 0; virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const = 0; - virtual InstructionCost getInstructionLatency(const Instruction *I) = 0; virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; }; @@ -1901,9 +1875,10 @@ InstructionCost getMemcpyCost(const Instruction *I) override { return Impl.getMemcpyCost(I); } - InstructionCost getUserCost(const User *U, ArrayRef Operands, - TargetCostKind CostKind) override { - return Impl.getUserCost(U, Operands, CostKind); + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TargetCostKind CostKind) override { + return Impl.getInstructionCost(U, Operands, CostKind); } BranchProbability getPredictableBranchThreshold() override { return Impl.getPredictableBranchThreshold(); @@ -2518,10 +2493,6 @@ return Impl.hasActiveVectorLength(Opcode, DataType, Alignment); } - InstructionCost getInstructionLatency(const Instruction *I) override { - return Impl.getInstructionLatency(I); - } - VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const override { return Impl.getVPLegalizationStrategy(PI); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -500,6 +500,12 @@ // FIXME: Unlikely to be true for CodeSize. return TTI::TCC_Expensive; } + + // Assume a 3cy latency for fp arithmetic ops. + if (CostKind == TTI::TCK_Latency) + if (Ty->getScalarType()->isFloatingPointTy()) + return 3; + return 1; } @@ -993,8 +999,9 @@ return TTI::TCC_Basic; } - InstructionCost getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind) { + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { using namespace llvm::PatternMatch; auto *TargetTTI = static_cast(this); @@ -1097,6 +1104,9 @@ CostKind, I); } case Instruction::Load: { + // FIXME: Arbitary cost which could come from the backend. + if (CostKind == TTI::TCK_Latency) + return 4; auto *LI = cast(U); Type *LoadType = U->getType(); // If there is a non-register sized type, the cost estimation may expand @@ -1248,39 +1258,10 @@ return TargetTTI->getVectorInstrCost(*EEI, DstTy, Idx); } } - // By default, just classify everything as 'basic'. - return TTI::TCC_Basic; - } - InstructionCost getInstructionLatency(const Instruction *I) { - SmallVector Operands(I->operand_values()); - if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) - return 0; - - if (isa(I)) - return 4; - - Type *DstTy = I->getType(); - - // Usually an intrinsic is a simple instruction. - // A real function call is much slower. - if (auto *CI = dyn_cast(I)) { - const Function *F = CI->getCalledFunction(); - if (!F || static_cast(this)->isLoweredToCall(F)) - return 40; - // Some intrinsics return a value and a flag, we use the value type - // to decide its latency. - if (StructType *StructTy = dyn_cast(DstTy)) - DstTy = StructTy->getElementType(0); - // Fall through to simple instructions. - } - - if (VectorType *VectorTy = dyn_cast(DstTy)) - DstTy = VectorTy->getElementType(); - if (DstTy->isFloatingPointTy()) - return 3; - - return 1; + // By default, just classify everything as 'basic' or -1 to represent that + // don't know the throughput cost. + return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic; } }; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -638,13 +638,6 @@ SimplifyAndSetOp); } - InstructionCost getInstructionLatency(const Instruction *I) { - if (isa(I)) - return getST()->getSchedModel().DefaultLoadLatency; - - return BaseT::getInstructionLatency(I); - } - virtual Optional getCacheSize(TargetTransformInfo::CacheLevel Level) const { return Optional( diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp --- a/llvm/lib/Analysis/CodeMetrics.cpp +++ b/llvm/lib/Analysis/CodeMetrics.cpp @@ -177,7 +177,7 @@ if (InvI->cannotDuplicate()) notDuplicatable = true; - NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize); + NumInsts += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); } if (isa(BB->getTerminator())) diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1361,8 +1361,8 @@ Operands.push_back(SimpleOp); else Operands.push_back(Op); - return TTI.getUserCost(&GEP, Operands, - TargetTransformInfo::TCK_SizeAndLatency) == + return TTI.getInstructionCost(&GEP, Operands, + TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } @@ -1639,7 +1639,7 @@ if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) SROAArgValues[&I] = SROAArg; - return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == + return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } @@ -1662,7 +1662,7 @@ if (auto *SROAArg = getSROAArgForValueOrNull(Op)) SROAArgValues[&I] = SROAArg; - return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == + return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } @@ -1692,7 +1692,7 @@ break; } - return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == + return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } @@ -2390,7 +2390,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. - if (TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == + if (TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free) return true; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -221,10 +221,10 @@ } InstructionCost -TargetTransformInfo::getUserCost(const User *U, - ArrayRef Operands, - enum TargetCostKind CostKind) const { - InstructionCost Cost = TTIImpl->getUserCost(U, Operands, CostKind); +TargetTransformInfo::getInstructionCost(const User *U, + ArrayRef Operands, + enum TargetCostKind CostKind) const { + InstructionCost Cost = TTIImpl->getInstructionCost(U, Operands, CostKind); assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) && "TTI should not produce negative costs!"); return Cost; @@ -1149,11 +1149,6 @@ return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment); } -InstructionCost -TargetTransformInfo::getInstructionLatency(const Instruction *I) const { - return TTIImpl->getInstructionLatency(I); -} - TargetTransformInfo::Concept::~Concept() = default; TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6603,8 +6603,8 @@ // If it's safe to speculatively execute, then it should not have side // effects; therefore, it's safe to sink and possibly *not* execute. return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && - TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >= - TargetTransformInfo::TCC_Expensive; + TTI->getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency) >= + TargetTransformInfo::TCC_Expensive; } /// Returns true if a SelectInst should be turned into an explicit branch. diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2340,8 +2340,8 @@ } SmallVector Operands(I.operand_values()); - Cost += - getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency); + Cost += getInstructionCost(&I, Operands, + TargetTransformInfo::TCK_SizeAndLatency); } } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -165,8 +165,9 @@ /// @} - InstructionCost getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind); + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind); // Hexagon specific decision to generate a lookup table. bool shouldBuildLookupTables() const; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -340,9 +340,10 @@ return ST.getL1CacheLineSize(); } -InstructionCost HexagonTTIImpl::getUserCost(const User *U, - ArrayRef Operands, - TTI::TargetCostKind CostKind) { +InstructionCost +HexagonTTIImpl::getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool { if (!CI->isIntegerCast()) return false; @@ -364,7 +365,7 @@ if (const CastInst *CI = dyn_cast(U)) if (isCastFoldedIntoLoad(CI)) return TargetTransformInfo::TCC_Free; - return BaseT::getUserCost(U, Operands, CostKind); + return BaseT::getInstructionCost(U, Operands, CostKind); } bool HexagonTTIImpl::shouldBuildLookupTables() const { diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -59,8 +59,9 @@ const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - InstructionCost getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind); + InstructionCost getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -321,21 +321,21 @@ (Ty->getPrimitiveSizeInBits() > 128); } -InstructionCost PPCTTIImpl::getUserCost(const User *U, - ArrayRef Operands, - TTI::TargetCostKind CostKind) { +InstructionCost PPCTTIImpl::getInstructionCost(const User *U, + ArrayRef Operands, + TTI::TargetCostKind CostKind) { // We already implement getCastInstrCost and getMemoryOpCost where we perform // the vector adjustment there. if (isa(U) || isa(U) || isa(U)) - return BaseT::getUserCost(U, Operands, CostKind); + return BaseT::getInstructionCost(U, Operands, CostKind); if (U->getType()->isVectorTy()) { // Instructions that need to be split should cost more. std::pair LT = getTypeLegalizationCost(U->getType()); - return LT.first * BaseT::getUserCost(U, Operands, CostKind); + return LT.first * BaseT::getInstructionCost(U, Operands, CostKind); } - return BaseT::getUserCost(U, Operands, CostKind); + return BaseT::getInstructionCost(U, Operands, CostKind); } // Determining the address of a TLS variable results in a function call in diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -492,8 +492,8 @@ } SmallVector Operands(I.operand_values()); - Cost += - getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency); + Cost += getInstructionCost(&I, Operands, + TargetTransformInfo::TCK_SizeAndLatency); } } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2621,6 +2621,11 @@ CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { + // Assume a 3cy latency for fp select ops. + if (CostKind == TTI::TCK_Latency && Opcode == Instruction::Select) + if (ValTy->getScalarType()->isFloatingPointTy()) + return 3; + // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -573,7 +573,8 @@ if (!I) return std::numeric_limits::min(); - auto Cost = TTI.getUserCost(U, TargetTransformInfo::TCK_SizeAndLatency); + InstructionCost Cost = + TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency); // Traverse recursively if there are more uses. // TODO: Any other instructions to be added here? diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -561,8 +561,8 @@ if (CI->cannotDuplicate() || CI->isConvergent()) return ~0U; - if (TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) - == TargetTransformInfo::TCC_Free) + if (TTI->getInstructionCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) == + TargetTransformInfo::TCC_Free) continue; // All other instructions count for at least one unit. diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1317,13 +1317,13 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI) { InstructionCost CostI = - TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); if (auto *GEP = dyn_cast(&I)) { if (CostI != TargetTransformInfo::TCC_Free) return false; - // For a GEP, we cannot simply use getUserCost because currently it - // optimistically assumes that a GEP will fold into addressing mode + // For a GEP, we cannot simply use getInstructionCost because currently + // it optimistically assumes that a GEP will fold into addressing mode // regardless of its users. const BasicBlock *BB = GEP->getParent(); for (const User *U : GEP->users()) { diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp --- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -552,7 +552,7 @@ m_Specific(FI.InnerTripCount)))) continue; InstructionCost Cost = - TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); LLVM_DEBUG(dbgs() << "Cost " << Cost << ": "; I.dump()); RepeatedInstrCost += Cost; } diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -443,7 +443,7 @@ // First accumulate the cost of this instruction. if (!Cost.IsFree) { - UnrolledCost += TTI.getUserCost(I, CostKind); + UnrolledCost += TTI.getInstructionCost(I, CostKind); LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration " << Iteration << "): "); LLVM_DEBUG(I->dump()); @@ -537,7 +537,7 @@ // Track this instruction's expected baseline cost when executing the // rolled loop form. - RolledDynamicCost += TTI.getUserCost(&I, CostKind); + RolledDynamicCost += TTI.getInstructionCost(&I, CostKind); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns true, mark the instruction as free after diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2878,7 +2878,7 @@ if (CB->isConvergent() || CB->cannotDuplicate()) return false; - Cost += TTI.getUserCost(&I, CostKind); + Cost += TTI.getInstructionCost(&I, CostKind); } assert(Cost >= 0 && "Must not have negative costs!"); LoopCost += Cost; diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -252,7 +252,7 @@ case Instruction::ShuffleVector: case Instruction::ExtractValue: case Instruction::InsertValue: - return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); + return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency); default: return InstructionCost::getInvalid(); // Disallow anything not explicitly diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -381,7 +381,7 @@ assert((!isa(I) || isSafeToSpeculativelyExecute(cast(I))) && "Instruction is not safe to speculatively execute!"); - return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); + return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency); } /// If we have a merge point of an "if condition" as accepted above, @@ -3626,8 +3626,8 @@ // Account for the cost of duplicating this instruction into each // predecessor. Ignore free instructions. - if (!TTI || - TTI->getUserCost(&I, CostKind) != TargetTransformInfo::TCC_Free) { + if (!TTI || TTI->getInstructionCost(&I, CostKind) != + TargetTransformInfo::TCC_Free) { NumBonusInsts += PredCount; // Early exits once we reach the limit. @@ -3799,7 +3799,8 @@ return false; // Not in white-list - not worthwhile folding. // And finally, if this is a non-free instruction that we are okay // speculating, ensure that we consider the speculation budget. - Cost += TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + Cost += + TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); if (Cost > Budget) return false; // Eagerly refuse to fold as soon as we're out of budget. } diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll @@ -33,7 +33,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %r ; ; LATE-LABEL: 'sqrt_v2f64' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = call @llvm.sqrt.nxv2f64( %a) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r = call @llvm.sqrt.nxv2f64( %a) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %r ; ; SIZE-LABEL: 'sqrt_v2f64' diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -49,8 +49,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'smax' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'smax' @@ -75,8 +75,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmuladd' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) +; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmuladd' @@ -101,8 +101,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'log2' -; LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %s = call float @llvm.log2.f32(float %a) -; LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) +; LATE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'log2' @@ -127,8 +127,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'constrained_fadd' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; LATE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'constrained_fadd' @@ -153,8 +153,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmaximum' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmaximum' @@ -180,7 +180,7 @@ ; ; LATE-LABEL: 'cttz' ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'cttz' @@ -206,7 +206,7 @@ ; ; LATE-LABEL: 'ctlz' ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'ctlz' @@ -231,8 +231,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fshl' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; LATE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fshl' @@ -256,7 +256,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedgather' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedgather' @@ -277,7 +277,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedscatter' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedscatter' @@ -298,7 +298,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fmax' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmax' @@ -319,7 +319,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'memcpy' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'memcpy' diff --git a/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll b/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll --- a/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/ARM/target-intrinsics.ll @@ -16,7 +16,7 @@ ; ; CHECK-THUMB2-LAT-LABEL: 'intrinsics' ; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef) -; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0f16(half* undef) +; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0f16(half* undef) ; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48) ; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef) ; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void diff --git a/llvm/test/Analysis/CostModel/SystemZ/ext-of-icmp-cost.ll b/llvm/test/Analysis/CostModel/SystemZ/ext-of-icmp-cost.ll --- a/llvm/test/Analysis/CostModel/SystemZ/ext-of-icmp-cost.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/ext-of-icmp-cost.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -passes="print" -cost-kind=code-size 2>&1 -disable-output \ ; RUN: -mtriple=s390x-unknown-linux -mcpu=z13 | FileCheck %s ; -; Check that getUserCost() does not return TCC_Free for extensions of +; Check that getInstructionCost() does not return TCC_Free for extensions of ; i1 returned from icmp. define i64 @fun1(i64 %v) { diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-costkinds.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-costkinds.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fp-costkinds.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-costkinds.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=latency < %s | FileCheck %s --check-prefixes=LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=code-size < %s | FileCheck %s --check-prefixes=SIZE,SSE-SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=SIZE_LATE,SSE-SIZE_LATE +; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=latency < %s | FileCheck %s --check-prefixes=CHECK,LATE,SSE-LATE +; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=code-size < %s | FileCheck %s --check-prefixes=CHECK,SIZE,SSE-SIZE +; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=CHECK,SIZE_LATE,SSE-SIZE_LATE ; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=latency < %s | FileCheck %s --check-prefixes=LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=code-size < %s | FileCheck %s --check-prefixes=SIZE,AVX-SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=SIZE_LATE,AVX-SIZE_LATE,AVX1-SIZE_LATE +; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=latency < %s | FileCheck %s --check-prefixes=CHECK,LATE,AVX-LATE,AVX1-LATE +; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=code-size < %s | FileCheck %s --check-prefixes=CHECK,SIZE,AVX-SIZE +; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=CHECK,SIZE_LATE,AVX-SIZE_LATE,AVX1-SIZE_LATE ; -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=latency < %s | FileCheck %s --check-prefixes=LATE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=code-size < %s | FileCheck %s --check-prefixes=SIZE,AVX-SIZE -; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=SIZE_LATE,AVX-SIZE_LATE,AVX2-SIZE_LATE +; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=latency < %s | FileCheck %s --check-prefixes=CHECK,LATE,AVX-LATE,AVX2-LATE +; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=code-size < %s | FileCheck %s --check-prefixes=CHECK,SIZE,AVX-SIZE +; RUN: opt -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 -cost-kind=size-latency < %s | FileCheck %s --check-prefixes=CHECK,SIZE_LATE,AVX-SIZE_LATE,AVX2-SIZE_LATE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -250,38 +250,16 @@ } define i32 @fdiv(i32 %arg) { -; LATE-LABEL: 'fdiv' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fdiv float undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fdiv <4 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = fdiv <8 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = fdiv <16 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = fdiv double undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fdiv <2 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = fdiv <4 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = fdiv <8 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'fdiv' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'fdiv' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-LABEL: 'fdiv' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fdiv float undef, undef %V4F32 = fdiv <4 x float> undef, undef @@ -297,38 +275,16 @@ } define i32 @frem(i32 %arg) { -; LATE-LABEL: 'frem' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = frem float undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = frem <4 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = frem <8 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = frem <16 x float> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = frem double undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = frem <2 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = frem <4 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = frem <8 x double> undef, undef -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE-LABEL: 'frem' -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = frem <4 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = frem <8 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = frem <16 x float> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = frem <4 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = frem <8 x double> undef, undef -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef -; -; SIZE_LATE-LABEL: 'frem' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = frem <4 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = frem <8 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = frem <16 x float> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = frem <4 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = frem <8 x double> undef, undef -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-LABEL: 'frem' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = frem float undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = frem <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = frem <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = frem <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = frem double undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = frem <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = frem <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = frem float undef, undef %V4F32 = frem <4 x float> undef, undef @@ -344,16 +300,16 @@ } define i32 @fsqrt(i32 %arg) { -; LATE-LABEL: 'fsqrt' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE-LATE-LABEL: 'fsqrt' +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE-SIZE-LABEL: 'fsqrt' ; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) @@ -377,6 +333,17 @@ ; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) ; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; AVX1-LATE-LABEL: 'fsqrt' +; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; AVX1-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; AVX1-SIZE_LATE-LABEL: 'fsqrt' ; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) ; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) @@ -388,6 +355,17 @@ ; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) ; AVX1-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; AVX2-LATE-LABEL: 'fsqrt' +; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; AVX2-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; AVX2-SIZE_LATE-LABEL: 'fsqrt' ; AVX2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) ; AVX2-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) @@ -413,16 +391,16 @@ } define i32 @fabs(i32 %arg) { -; LATE-LABEL: 'fabs' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.fabs.f32(float undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = call double @llvm.fabs.f64(double undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE-LATE-LABEL: 'fabs' +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE-SIZE-LABEL: 'fabs' ; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) @@ -446,6 +424,17 @@ ; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) ; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; AVX-LATE-LABEL: 'fabs' +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.fabs.f64(double undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; AVX-SIZE-LABEL: 'fabs' ; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.fabs.f32(float undef) ; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) @@ -482,16 +471,16 @@ } define i32 @fcopysign(i32 %arg) { -; LATE-LABEL: 'fcopysign' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE-LATE-LABEL: 'fcopysign' +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE-SIZE-LABEL: 'fcopysign' ; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) @@ -515,6 +504,17 @@ ; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) ; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; AVX-LATE-LABEL: 'fcopysign' +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; AVX-SIZE-LABEL: 'fcopysign' ; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) ; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) @@ -551,16 +551,16 @@ } define i32 @fma(i32 %arg) { -; LATE-LABEL: 'fma' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE-LATE-LABEL: 'fma' +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; SSE-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; SSE-SIZE-LABEL: 'fma' ; SSE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) @@ -584,6 +584,17 @@ ; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) ; SSE-SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; AVX-LATE-LABEL: 'fma' +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef) +; AVX-LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; AVX-SIZE-LABEL: 'fma' ; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) ; AVX-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/costmodel.ll b/llvm/test/Analysis/CostModel/X86/costmodel.ll --- a/llvm/test/Analysis/CostModel/X86/costmodel.ll +++ b/llvm/test/Analysis/CostModel/X86/costmodel.ll @@ -19,7 +19,7 @@ ; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %P2I = ptrtoint i8* undef to i64 ; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %TC = trunc i64 undef to i32 ; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) -; LATENCY-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void undef() +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void undef() ; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 undef ; ; CODESIZE-LABEL: 'foo' diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -55,8 +55,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'umul' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'umul' @@ -81,8 +81,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'smax' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'smax' @@ -107,8 +107,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fcopysign' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fcopysign' @@ -133,8 +133,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmuladd' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) +; LATE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmuladd' @@ -159,8 +159,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'log2' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.log2.f32(float %a) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) +; LATE-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'log2' @@ -185,8 +185,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'constrained_fadd' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'constrained_fadd' @@ -211,8 +211,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmaximum' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fmaximum' @@ -237,8 +237,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'cttz' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'cttz' @@ -263,8 +263,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'ctlz' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) +; LATE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'ctlz' @@ -289,8 +289,8 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fshl' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fshl' @@ -314,7 +314,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedgather' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedgather' @@ -335,7 +335,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedscatter' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedscatter' @@ -356,7 +356,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fmax' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmax' @@ -377,7 +377,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fmul' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul' @@ -398,7 +398,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fadd_fast' -; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_fast' @@ -419,7 +419,7 @@ ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'memcpy' -; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'memcpy'