diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -901,12 +901,15 @@ /// \p Args is an optional argument which holds the instruction operands /// values so the TTI can analyze those values searching for special /// cases or optimizations based on those values. + /// \p CxtI is the optional original context instruction, if one exists, to + /// provide even more information. int getArithmeticInstrCost( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, OperandValueKind Opd2Info = OK_AnyValue, OperandValueProperties Opd1PropInfo = OP_None, OperandValueProperties Opd2PropInfo = OP_None, - ArrayRef Args = ArrayRef()) const; + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr) const; /// \return The cost of a shuffle instruction of kind Kind and of type Tp. /// The index and subtype parameters are used by the subvector insertion and @@ -1309,12 +1312,11 @@ virtual unsigned getMaxPrefetchIterationsAhead() const = 0; virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; - virtual unsigned - getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, - OperandValueKind Opd2Info, - OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo, - ArrayRef Args) = 0; + virtual unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo, ArrayRef Args, + const Instruction *CxtI = nullptr) = 0; virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) = 0; virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, @@ -1709,14 +1711,15 @@ BlockFrequencyInfo *BFI) override { return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); } - unsigned - getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, - OperandValueKind Opd2Info, - OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo, - ArrayRef Args) override { + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Opd1Info, + OperandValueKind Opd2Info, + OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo, + ArrayRef Args, + const Instruction *CxtI = nullptr) override { return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo, Args); + Opd1PropInfo, Opd2PropInfo, Args, CxtI); } int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -430,7 +430,8 @@ TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, - ArrayRef Args) { + ArrayRef Args, + const Instruction *CxtI = nullptr) { return 1; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -633,7 +633,8 @@ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef Args = ArrayRef()) { + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr) { // Check if any of the operands are vector operands. const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -129,7 +129,7 @@ bool isUnaryOp() const { return isUnaryOp(getOpcode()); } bool isBinaryOp() const { return isBinaryOp(getOpcode()); } bool isIntDivRem() const { return isIntDivRem(getOpcode()); } - bool isShift() { return isShift(getOpcode()); } + bool isShift() const { return isShift(getOpcode()); } bool isCast() const { return isCast(getOpcode()); } bool isFuncletPad() const { return isFuncletPad(getOpcode()); } bool isExceptionalTerminator() const { diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -592,10 +592,10 @@ int TargetTransformInfo::getArithmeticInstrCost( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo, - ArrayRef Args) const { - int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo, Args); + OperandValueProperties Opd2PropInfo, ArrayRef Args, + const Instruction *CxtI) const { + int Cost = TTIImpl->getArithmeticInstrCost( + Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -1183,7 +1183,7 @@ Op2VK = getOperandInfo(I->getOperand(1), Op2VP); SmallVector Operands(I->operand_values()); return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, - Op1VP, Op2VP, Operands); + Op1VP, Op2VP, Operands, I); } case Instruction::FNeg: { TargetTransformInfo::OperandValueKind Op1VK, Op2VK; @@ -1193,7 +1193,7 @@ Op2VP = OP_None; SmallVector Operands(I->operand_values()); return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, - Op1VP, Op2VP, Operands); + Op1VP, Op2VP, Operands, I); } case Instruction::Select: { const SelectInst *SI = cast(I); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -124,7 +124,8 @@ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef Args = ArrayRef()); + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -484,7 +484,8 @@ int AArch64TTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args) { + TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, + const Instruction *CxtI) { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -172,12 +172,13 @@ bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; int getArithmeticInstrCost( - unsigned Opcode, Type *Ty, - TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, - TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, - TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, - TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef Args = ArrayRef()); + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); unsigned getCFInstrCost(unsigned Opcode); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -338,10 +338,13 @@ } } -int GCNTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, - TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args ) { +int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, + ArrayRef Args, + const Instruction *CxtI) { EVT OrigTy = TLI->getValueType(DL, Ty); if (!OrigTy.isSimple()) { return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, @@ -801,7 +804,7 @@ case Instruction::FNeg: { return getArithmeticInstrCost(I->getOpcode(), I->getType(), TTI::OK_AnyValue, TTI::OK_AnyValue, - TTI::OP_None, TTI::OP_None, Operands); + TTI::OP_None, TTI::OP_None, Operands, I); } default: break; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -187,7 +187,8 @@ TTI::OperandValueKind Op2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef Args = ArrayRef()); + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I = nullptr); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -642,11 +642,13 @@ return BaseCost * BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } -int ARMTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, - TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, - ArrayRef Args) { +int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::OperandValueKind Op1Info, + TTI::OperandValueKind Op2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, + ArrayRef Args, + const Instruction *CxtI) { int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); @@ -714,6 +716,33 @@ return Cost; } + // If this operation is a shift on arm/thumb2, it might well be folded into + // the following instruction, hence having a cost of 0. + auto LooksLikeAFreeShift = [&]() { + if (ST->isThumb1Only() || Ty->isVectorTy()) + return false; + + if (!CxtI || !CxtI->hasOneUse() || !CxtI->isShift()) + return false; + if (Op2Info != TargetTransformInfo::OK_UniformConstantValue) + return false; + + // Folded into a ADC/ADD/AND/BIC/CMP/EOR/MVN/ORR/ORN/RSB/SBC/SUB + switch (cast(CxtI->user_back())->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Xor: + case Instruction::Or: + case Instruction::ICmp: + return true; + default: + return false; + } + }; + if (LooksLikeAFreeShift()) + return 0; + int BaseCost = ST->hasMVEIntegerOps() && Ty->isVectorTy() ? ST->getMVEVectorCostFactor() : 1; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -127,12 +127,14 @@ bool UseMaskForGaps = false); unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I); - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, - TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, - TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, - TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, - TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef Args = ArrayRef()); + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I = nullptr); unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -236,17 +236,18 @@ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } -unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, - TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, - TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args) { +unsigned HexagonTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, + const Instruction *CxtI) { if (Ty->isVectorTy()) { std::pair LT = TLI.getTypeLegalizationCost(DL, Ty); if (LT.second.isFloatingPoint()) return LT.first + FloatFactor * getTypeNumElements(Ty); } return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo, Args); + Opd1PropInfo, Opd2PropInfo, Args, CxtI); } unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy, diff --git a/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h b/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h --- a/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h +++ b/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h @@ -81,7 +81,8 @@ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef Args = ArrayRef()) { + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr) { int ISD = TLI->InstructionOpcodeToISD(Opcode); switch (ISD) { diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -91,7 +91,8 @@ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef Args = ArrayRef()); + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -113,7 +113,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args) { + TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, + const Instruction *CxtI) { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -90,7 +90,8 @@ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef Args = ArrayRef()); + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I = nullptr); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -722,10 +722,13 @@ return Cost * 2; } -int PPCTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, - TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args) { +int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::OperandValueKind Op1Info, + TTI::OperandValueKind Op2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, + ArrayRef Args, + const Instruction *CxtI) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); // Fallback to the default implementation. diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -75,7 +75,8 @@ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef Args = ArrayRef()); + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -348,11 +348,10 @@ } int SystemZTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, - TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, - TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, - ArrayRef Args) { + unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, + TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, + const Instruction *CxtI) { // TODO: return a good value for BB-VECTORIZER that includes the // immediate loads, which we do not want to count for the loop @@ -508,7 +507,7 @@ // Fallback to the default implementation. return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, - Opd1PropInfo, Opd2PropInfo, Args); + Opd1PropInfo, Opd2PropInfo, Args, CxtI); } int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -61,7 +61,8 @@ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef Args = ArrayRef()); + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); /// @} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -46,7 +46,8 @@ unsigned WebAssemblyTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args) { + TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, + const Instruction *CxtI) { unsigned Cost = BasicTTIImplBase::getArithmeticInstrCost( Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -125,7 +125,8 @@ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, - ArrayRef Args = ArrayRef()); + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I = nullptr); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -169,12 +169,13 @@ return 2; } -int X86TTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, - TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, - TTI::OperandValueProperties Opd1PropInfo, - TTI::OperandValueProperties Opd2PropInfo, - ArrayRef Args) { +int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::OperandValueKind Op1Info, + TTI::OperandValueKind Op2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, + ArrayRef Args, + const Instruction *CxtI) { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6234,7 +6234,7 @@ unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; return N * TTI.getArithmeticInstrCost( I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue, - Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands); + Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I); } case Instruction::FNeg: { unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; @@ -6242,7 +6242,7 @@ I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None, - I->getOperand(0)); + I->getOperand(0), I); } case Instruction::Select: { SelectInst *SI = cast(I); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3420,13 +3420,13 @@ SmallVector Operands(VL0->operand_values()); int ScalarEltCost = TTI->getArithmeticInstrCost( - E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands); + E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, Op1VK, - Op2VK, Op1VP, Op2VP, Operands); + int VecCost = TTI->getArithmeticInstrCost( + E->getOpcode(), VecTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::GetElementPtr: { diff --git a/llvm/test/Analysis/CostModel/ARM/freeshift.ll b/llvm/test/Analysis/CostModel/ARM/freeshift.ll --- a/llvm/test/Analysis/CostModel/ARM/freeshift.ll +++ b/llvm/test/Analysis/CostModel/ARM/freeshift.ll @@ -3,17 +3,17 @@ define void @shl(i32 %a, i32 %b) { ; CHECK-LABEL: 'shl' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = shl i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ss = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ss = shl i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xs = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %xs = shl i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ns = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ns = shl i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %os = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %os = shl i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %is = shl i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %is = shl i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -34,17 +34,17 @@ define void @ashr(i32 %a, i32 %b) { ; CHECK-LABEL: 'ashr' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = ashr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ss = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ss = ashr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xs = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %xs = ashr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ns = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ns = ashr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %os = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %os = ashr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %is = ashr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %is = ashr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -65,17 +65,17 @@ define void @lshr(i32 %a, i32 %b) { ; CHECK-LABEL: 'lshr' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %as = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %as = lshr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ss = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ss = lshr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xs = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %xs = lshr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ns = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ns = lshr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %os = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %os = lshr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %is = lshr i32 %a, 3 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %is = lshr i32 %a, 3 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll @@ -5,11 +5,11 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK-LABEL: test -; CHECK-COST: LV: Found an estimated cost of 1 for VF 1 For instruction: %and515 = shl i32 %l41, 3 +; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %and515 = shl i32 %l41, 3 ; CHECK-COST: LV: Found an estimated cost of 1 for VF 1 For instruction: %l45 = and i32 %and515, 131072 ; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction: %and515 = shl i32 %l41, 3 ; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction: %l45 = and i32 %and515, 131072 -; CHECK: vector.body +; CHECK-NOT: vector.body define void @test([101 x i32] *%src, i32 %N) #0 { entry: