Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -710,11 +710,17 @@ /// and the number of execution units in the CPU. unsigned getMaxInterleaveFactor(unsigned VF) const; - /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc. + /// This is the expected reciprocal throughput in cycles of a math/logic op on + /// a particular pipeline on the target. For example, if an independent + /// integer add can execute every cycle on an ALU for this target, then the + /// cost should be 1. If a division prevents any other division from + /// executing on its unit for 5 cycles, then the cost should be 5. This cost + /// is not affected if the target has N independent pipelines that can execute + /// this kind of operation for greater parallelism. /// \p Args is an optional argument which holds the instruction operands - /// values so the TTI can analyize those values searching for special - /// cases\optimizations based on those values. - int getArithmeticInstrCost( + /// values so the TTI can analyze those values searching for special + /// cases or optimizations based on those values. + int getUnitThroughput( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, OperandValueKind Opd2Info = OK_AnyValue, OperandValueProperties Opd1PropInfo = OP_None, @@ -1042,11 +1048,11 @@ virtual unsigned getMaxPrefetchIterationsAhead() = 0; virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned - getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, - OperandValueKind Opd2Info, - OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo, - ArrayRef Args) = 0; + getUnitThroughput(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + OperandValueKind Opd2Info, + OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo, + ArrayRef Args) = 0; virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) = 0; virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, @@ -1351,13 +1357,13 @@ return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize); } unsigned - getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, - OperandValueKind Opd2Info, - OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo, - ArrayRef Args) override { - return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo, Args); + getUnitThroughput(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + OperandValueKind Opd2Info, + OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo, + ArrayRef Args) override { + return Impl.getUnitThroughput(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo, Args); } int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) override { Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -387,12 +387,12 @@ unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, - TTI::OperandValueKind Opd1Info, - TTI::OperandValueKind Opd2Info, - TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, - ArrayRef Args) { + unsigned getUnitThroughput(unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, + ArrayRef Args) { return 1; } Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -474,7 +474,7 @@ unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } - unsigned getArithmeticInstrCost( + unsigned getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, @@ -511,7 +511,7 @@ if (Ty->isVectorTy()) { unsigned Num = Ty->getVectorNumElements(); unsigned Cost = static_cast(this) - ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); + ->getUnitThroughput(Opcode, Ty->getScalarType()); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. return getScalarizationOverhead(Ty, Args) + Num * Cost; @@ -1086,9 +1086,9 @@ // point mul followed by an add. if (IID == Intrinsic::fmuladd) return static_cast(this) - ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + + ->getUnitThroughput(BinaryOperator::FMul, RetTy) + static_cast(this) - ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); + ->getUnitThroughput(BinaryOperator::FAdd, RetTy); // Else, assume that we need to scalarize this intrinsic. For math builtins // this will emit a costly libcall, adding call overhead and spills. Make it @@ -1203,7 +1203,7 @@ ShuffleCost += (IsPairwise + 1) * ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, Ty); - ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); + ArithCost += ConcreteTTI->getUnitThroughput(Opcode, Ty); Ty = VectorType::get(ScalarTy, NumVecElts); ++LongVectorCount; } @@ -1215,7 +1215,7 @@ ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, Ty); ArithCost += (NumReduxLevels - LongVectorCount) * - ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); + ConcreteTTI->getUnitThroughput(Opcode, Ty); return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -370,13 +370,13 @@ return TTIImpl->getMaxInterleaveFactor(VF); } -int TargetTransformInfo::getArithmeticInstrCost( +int TargetTransformInfo::getUnitThroughput( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args) const { - int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo, Args); + int Cost = TTIImpl->getUnitThroughput(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo, Args); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -1007,10 +1007,9 @@ TargetTransformInfo::OperandValueKind Op2VK = getOperandInfo(I->getOperand(1)); SmallVector Operands(I->operand_values()); - return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, - Op2VK, TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None, - Operands); + return getUnitThroughput(I->getOpcode(), I->getType(), Op1VK, Op2VK, + TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None, Operands); } case Instruction::Select: { const SelectInst *SI = cast(I); Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -5650,10 +5650,10 @@ TargetTransformInfo::OperandValueKind Arg1OVK = !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue : TargetTransformInfo::OK_AnyValue; - ScalarCost += TTI.getArithmeticInstrCost( - Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); - VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, - Arg0OVK, Arg1OVK); + ScalarCost += TTI.getUnitThroughput(Inst->getOpcode(), Inst->getType(), + Arg0OVK, Arg1OVK); + VectorCost += TTI.getUnitThroughput(Inst->getOpcode(), PromotedType, + Arg0OVK, Arg1OVK); } DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: " << ScalarCost << "\nVector: " << VectorCost << '\n'); Index: lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.h +++ lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -118,7 +118,7 @@ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - int getArithmeticInstrCost( + int getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -474,7 +474,7 @@ return ST->getVectorInsertExtractBaseCost(); } -int AArch64TTIImpl::getArithmeticInstrCost( +int AArch64TTIImpl::getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args) { @@ -500,25 +500,25 @@ // normally expanded to the sequence ADD + CMP + SELECT + SRA. // The OperandValue properties many not be same as that of previous // operation; conservatively assume OP_None. - Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info, - TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info, - TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info, - TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info, - TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None); + Cost += getUnitThroughput(Instruction::Add, Ty, Opd1Info, Opd2Info, + TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None); + Cost += getUnitThroughput(Instruction::Sub, Ty, Opd1Info, Opd2Info, + TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None); + Cost += getUnitThroughput(Instruction::Select, Ty, Opd1Info, Opd2Info, + TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None); + Cost += getUnitThroughput(Instruction::AShr, Ty, Opd1Info, Opd2Info, + TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None); return Cost; } switch (ISD) { default: - return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + return Cost + BaseT::getUnitThroughput(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); case ISD::ADD: case ISD::MUL: case ISD::XOR: Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -134,7 +134,7 @@ bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; - int getArithmeticInstrCost( + int getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -318,14 +318,14 @@ } } -int AMDGPUTTIImpl::getArithmeticInstrCost( +int AMDGPUTTIImpl::getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args ) { EVT OrigTy = TLI->getValueType(DL, Ty); if (!OrigTy.isSimple()) { - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + return BaseT::getUnitThroughput(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); } // Legalize the type. @@ -424,8 +424,8 @@ break; } - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + return BaseT::getUnitThroughput(Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, + Opd2PropInfo); } unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) { Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -156,7 +156,7 @@ int getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr); - int getArithmeticInstrCost( + int getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, TTI::OperandValueKind Op2Info = TTI::OK_AnyValue, Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -453,7 +453,7 @@ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } -int ARMTTIImpl::getArithmeticInstrCost( +int ARMTTIImpl::getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, @@ -508,8 +508,8 @@ if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second)) return LT.first * Entry->Cost; - int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, - Opd1PropInfo, Opd2PropInfo); + int Cost = BaseT::getUnitThroughput(Opcode, Ty, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo); // This is somewhat of a hack. The problem that we are facing is that SROA // creates a sequence of shift, and, or instructions to construct values. Index: lib/Target/Lanai/LanaiTargetTransformInfo.h =================================================================== --- lib/Target/Lanai/LanaiTargetTransformInfo.h +++ lib/Target/Lanai/LanaiTargetTransformInfo.h @@ -76,7 +76,7 @@ return getIntImmCost(Imm, Ty); } - unsigned getArithmeticInstrCost( + unsigned getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, @@ -87,8 +87,8 @@ switch (ISD) { default: - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + return BaseT::getUnitThroughput(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); case ISD::MUL: case ISD::SDIV: case ISD::UDIV: @@ -98,8 +98,8 @@ // instruction cost was arbitrarily chosen to reduce the desirability // of emitting arithmetic instructions that are emulated in software. // TODO: Investigate the performance impact given specialized lowerings. - return 64 * BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + return 64 * BaseT::getUnitThroughput(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); } } }; Index: lib/Target/NVPTX/NVPTXTargetTransformInfo.h =================================================================== --- lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -53,7 +53,7 @@ // calls are particularly expensive in NVPTX. unsigned getInliningThresholdMultiplier() { return 5; } - int getArithmeticInstrCost( + int getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, Index: lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp =================================================================== --- lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -112,7 +112,7 @@ return false; } -int NVPTXTTIImpl::getArithmeticInstrCost( +int NVPTXTTIImpl::getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args) { @@ -123,8 +123,8 @@ switch (ISD) { default: - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + return BaseT::getUnitThroughput(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); case ISD::ADD: case ISD::MUL: case ISD::XOR: @@ -136,8 +136,8 @@ if (LT.second.SimpleTy == MVT::i64) return 2 * LT.first; // Delegate other cases to the basic TTI. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + return BaseT::getUnitThroughput(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); } } Index: lib/Target/PowerPC/PPCTargetTransformInfo.h =================================================================== --- lib/Target/PowerPC/PPCTargetTransformInfo.h +++ lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -71,7 +71,7 @@ unsigned getCacheLineSize(); unsigned getPrefetchDistance(); unsigned getMaxInterleaveFactor(unsigned VF); - int getArithmeticInstrCost( + int getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, Index: lib/Target/PowerPC/PPCTargetTransformInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -324,15 +324,15 @@ return 2; } -int PPCTTIImpl::getArithmeticInstrCost( +int PPCTTIImpl::getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); // Fallback to the default implementation. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, - Opd1PropInfo, Opd2PropInfo); + return BaseT::getUnitThroughput(Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, + Opd2PropInfo); } int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Index: lib/Target/SystemZ/SystemZTargetTransformInfo.h =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -68,7 +68,7 @@ bool supportsEfficientVectorElementLoadStore() { return true; } bool enableInterleavedAccessVectorization() { return true; } - int getArithmeticInstrCost( + int getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, Index: lib/Target/SystemZ/SystemZTargetTransformInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -328,7 +328,7 @@ return (VT.isScalarInteger() && TLI->isTypeLegal(VT)); } -int SystemZTTIImpl::getArithmeticInstrCost( +int SystemZTTIImpl::getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -368,7 +368,7 @@ } if (Ty->isVectorTy()) { - assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type."); + assert (ST->hasVector() && "getUnitThroughput() called with vector type."); unsigned VF = Ty->getVectorNumElements(); unsigned NumVectors = getNumberOfParts(Ty); @@ -395,7 +395,7 @@ return NumVectors; // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. - unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); + unsigned ScalarCost = getUnitThroughput(Opcode, Ty->getScalarType()); unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args); // FIXME: VF 2 for these FP operations are currently just as // expensive as for VF 4. @@ -459,8 +459,8 @@ } // Fallback to the default implementation. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, - Opd1PropInfo, Opd2PropInfo, Args); + return BaseT::getUnitThroughput(Opcode, Ty, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo, Args); } Index: lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- lib/Target/X86/X86TargetTransformInfo.h +++ lib/Target/X86/X86TargetTransformInfo.h @@ -62,7 +62,7 @@ unsigned getRegisterBitWidth(bool Vector) const; unsigned getLoadStoreVecRegBitWidth(unsigned AS) const; unsigned getMaxInterleaveFactor(unsigned VF); - int getArithmeticInstrCost( + int getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -169,7 +169,7 @@ return 2; } -int X86TTIImpl::getArithmeticInstrCost( +int X86TTIImpl::getUnitThroughput( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -238,15 +238,15 @@ // normally expanded to the sequence SRA + SRL + ADD + SRA. // The OperandValue properties many not be same as that of previous // operation;conservatively assume OP_None. - int Cost = 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, + int Cost = 2 * getUnitThroughput(Instruction::AShr, Ty, Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info, - TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info, - TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None); + Cost += getUnitThroughput(Instruction::LShr, Ty, Op1Info, Op2Info, + TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None); + Cost += getUnitThroughput(Instruction::Add, Ty, Op1Info, Op2Info, + TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None); return Cost; } @@ -737,7 +737,7 @@ return LT.first * Entry->Cost; // Fallback to the default implementation. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); + return BaseT::getUnitThroughput(Opcode, Ty, Op1Info, Op2Info); } int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -881,9 +881,9 @@ // could compute more comprehensively the cost of all instructions on the // induction variable when necessary. if (TTI && - TTI->getArithmeticInstrCost(Instruction::Add, Ty) > - TTI->getArithmeticInstrCost(Instruction::Add, - Cast->getOperand(0)->getType())) { + TTI->getUnitThroughput(Instruction::Add, Ty) > + TTI->getUnitThroughput(Instruction::Add, + Cast->getOperand(0)->getType())) { return; } Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7211,7 +7211,7 @@ Cost += VF * TTI.getCFInstrCost(Instruction::PHI); // The cost of the non-predicated instruction. - Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy); + Cost += VF * TTI.getUnitThroughput(I->getOpcode(), RetTy); // The cost of insertelement and extractelement instructions needed for // scalarization. @@ -7272,8 +7272,8 @@ } SmallVector Operands(I->operand_values()); unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; - return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, - Op2VK, Op1VP, Op2VP, Operands); + return N * TTI.getUnitThroughput(I->getOpcode(), VectorTy, Op1VK, Op2VK, + Op1VP, Op2VP, Operands); } case Instruction::Select: { SelectInst *SI = cast(I); @@ -7364,7 +7364,7 @@ default: // The cost of executing VF copies of the scalar instruction. This opcode // is unknown. Assume that it is the same as 'mul'. - return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) + + return VF * TTI.getUnitThroughput(Instruction::Mul, VectorTy) + getScalarizationOverhead(I, VF, TTI); } // end of switch. } Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2204,15 +2204,15 @@ if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * - TTI->getArithmeticInstrCost(S.Opcode, ScalarTy, Op1VK, Op2VK, Op1VP, - Op2VP, Operands); + TTI->getUnitThroughput(S.Opcode, ScalarTy, Op1VK, Op2VK, Op1VP, + Op2VP, Operands); } int ScalarCost = VecTy->getNumElements() * - TTI->getArithmeticInstrCost(S.Opcode, ScalarTy, Op1VK, Op2VK, Op1VP, - Op2VP, Operands); - int VecCost = TTI->getArithmeticInstrCost(S.Opcode, VecTy, Op1VK, Op2VK, - Op1VP, Op2VP, Operands); + TTI->getUnitThroughput(S.Opcode, ScalarTy, Op1VK, Op2VK, Op1VP, + Op2VP, Operands); + int VecCost = TTI->getUnitThroughput(S.Opcode, VecTy, Op1VK, Op2VK, + Op1VP, Op2VP, Operands); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::GetElementPtr: { @@ -2223,14 +2223,14 @@ if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * - TTI->getArithmeticInstrCost(Instruction::Add, - ScalarTy, Op1VK, Op2VK); + TTI->getUnitThroughput(Instruction::Add, ScalarTy, + Op1VK, Op2VK); } int ScalarCost = VecTy->getNumElements() * - TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK); + TTI->getUnitThroughput(Instruction::Add, ScalarTy, Op1VK, Op2VK); int VecCost = - TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK); + TTI->getUnitThroughput(Instruction::Add, VecTy, Op1VK, Op2VK); return ReuseShuffleCost + VecCost - ScalarCost; } @@ -2308,15 +2308,15 @@ Instruction *I = cast(VL[Idx]); if (!I) continue; - ReuseShuffleCost -= TTI->getArithmeticInstrCost( - I->getOpcode(), ScalarTy, Op1VK, Op2VK); + ReuseShuffleCost -= TTI->getUnitThroughput(I->getOpcode(), ScalarTy, + Op1VK, Op2VK); } for (Value *V : VL) { Instruction *I = cast(V); if (!I) continue; - ReuseShuffleCost += TTI->getArithmeticInstrCost( - I->getOpcode(), ScalarTy, Op1VK, Op2VK); + ReuseShuffleCost += TTI->getUnitThroughput(I->getOpcode(), ScalarTy, + Op1VK, Op2VK); } } int VecCost = 0; @@ -2324,17 +2324,15 @@ Instruction *I = cast(i); if (!I) break; - ScalarCost += - TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK); + ScalarCost += TTI->getUnitThroughput(I->getOpcode(), ScalarTy, Op1VK, + Op2VK); } // VecCost is equal to sum of the cost of creating 2 vectors // and the cost of creating shuffle. Instruction *I0 = cast(VL[0]); - VecCost = - TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK); + VecCost = TTI->getUnitThroughput(I0->getOpcode(), VecTy, Op1VK, Op2VK); Instruction *I1 = cast(VL[1]); - VecCost += - TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK); + VecCost += TTI->getUnitThroughput(I1->getOpcode(), VecTy, Op1VK, Op2VK); VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0); return ReuseShuffleCost + VecCost - ScalarCost; @@ -5692,7 +5690,7 @@ switch (ReductionData.getKind()) { case RK_Arithmetic: ScalarReduxCost = - TTI->getArithmeticInstrCost(ReductionData.getOpcode(), ScalarTy); + TTI->getUnitThroughput(ReductionData.getOpcode(), ScalarTy); break; case RK_Min: case RK_Max: