Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -411,6 +411,11 @@ /// containing this constant value for the target. bool shouldBuildLookupTablesForConstant(Constant *C) const; + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + + unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) const; + /// \brief Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; @@ -744,6 +749,10 @@ virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; + virtual unsigned + getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0; + virtual unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; @@ -934,6 +943,14 @@ bool shouldBuildLookupTablesForConstant(Constant *C) override { return Impl.shouldBuildLookupTablesForConstant(C); } + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + return Impl.getScalarizationOverhead(Ty, Insert, Extract); + } + unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) { + return Impl.getOperandsScalarizationOverhead(Args, VF); + } + bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -251,6 +251,13 @@ bool shouldBuildLookupTables() { return true; } bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + return 0; + } + + unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) { return 0; } + bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } bool enableInterleavedAccessVectorization() { return false; } Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -42,24 +42,6 @@ typedef TargetTransformInfoImplCRTPBase BaseT; typedef TargetTransformInfo TTI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { - assert(Ty->isVectorTy() && "Can only scalarize vectors"); - unsigned Cost = 0; - - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - if (Insert) - Cost += static_cast(this) - ->getVectorInstrCost(Instruction::InsertElement, Ty, i); - if (Extract) - Cost += static_cast(this) - ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - - return Cost; - } - /// Estimate a cost of shuffle as a sequence of extract and insert /// operations. unsigned getPermuteShuffleOverhead(Type *Ty) { @@ -301,6 +283,38 @@ unsigned getRegisterBitWidth(bool Vector) { return 32; } + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + assert(Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += static_cast(this) + ->getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += static_cast(this) + ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; + } + + /// Estimate the overhead of scalarizing an instructions unique operands. + unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) { + assert (!Args.empty() && "Should only be called with existing arguments"); + unsigned Cost = 0; + SmallPtrSet UniqueOperands; + for (const Value *A : Args) { + if (UniqueOperands.insert(A).second) + Cost += getScalarizationOverhead(VectorType::get(A->getType(), VF), + false, true); + } + return Cost; + } + unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } unsigned getArithmeticInstrCost( @@ -344,7 +358,13 @@ // return the cost of multiple scalar invocation plus the cost of // inserting // and extracting the values. - return getScalarizationOverhead(Ty, true, true) + Num * Cost; + + unsigned TotCost = getScalarizationOverhead(Ty, true, false) + Num * Cost; + if (!Args.empty()) + TotCost += getOperandsScalarizationOverhead(Args, Num); + else + TotCost += getScalarizationOverhead(Ty, false, true); + return TotCost; } // We don't know anything about this scalar instruction. Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -182,6 +182,17 @@ return TTIImpl->shouldBuildLookupTablesForConstant(C); } +unsigned TargetTransformInfo:: +getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const { + return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract); +} + +unsigned TargetTransformInfo:: +getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) const { + return TTIImpl->getOperandsScalarizationOverhead(Args, VF); +} + bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } Index: lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.h +++ lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -34,10 +34,6 @@ const AArch64Subtarget *ST; const AArch64TargetLowering *TLI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); - const AArch64Subtarget *getST() const { return ST; } const AArch64TargetLowering *getTLI() const { return TLI; } Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -33,10 +33,6 @@ const ARMSubtarget *ST; const ARMTargetLowering *TLI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); - const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } Index: lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- lib/Target/X86/X86TargetTransformInfo.h +++ lib/Target/X86/X86TargetTransformInfo.h @@ -33,8 +33,6 @@ const X86Subtarget *ST; const X86TargetLowering *TLI; - int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); - const X86Subtarget *getST() const { return ST; } const X86TargetLowering *getTLI() const { return TLI; } Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -1579,20 +1579,6 @@ return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; } -int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { - assert (Ty->isVectorTy() && "Can only scalarize vectors"); - int Cost = 0; - - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - if (Insert) - Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i); - if (Extract) - Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - - return Cost; -} - int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) { // Handle non-power-of-two vectors such as <3 x float> Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3609,37 +3609,18 @@ return V; } -/// \brief Estimate the overhead of scalarizing a value based on its type. -/// Insert and Extract are set if the result needs to be inserted and/or -/// extracted from vectors. -static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract, - const TargetTransformInfo &TTI) { - if (Ty->isVoidTy()) - return 0; - - assert(Ty->isVectorTy() && "Can only scalarize vectors"); - unsigned Cost = 0; - - for (unsigned I = 0, E = Ty->getVectorNumElements(); I < E; ++I) { - if (Extract) - Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, I); - if (Insert) - Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, I); - } - - return Cost; -} - /// \brief Estimate the overhead of scalarizing an Instruction based on the /// types of its operands and return value. static unsigned getScalarizationOverhead(SmallVectorImpl &OpTys, Type *RetTy, const TargetTransformInfo &TTI) { - unsigned ScalarizationCost = - getScalarizationOverhead(RetTy, true, false, TTI); + unsigned ScalarizationCost = 0; + + if (!RetTy->isVoidTy()) + ScalarizationCost += TTI.getScalarizationOverhead(RetTy, true, false); for (Type *Ty : OpTys) - ScalarizationCost += getScalarizationOverhead(Ty, false, true, TTI); + ScalarizationCost += TTI.getScalarizationOverhead(Ty, false, true); return ScalarizationCost; } @@ -3651,14 +3632,13 @@ if (VF == 1) return 0; + unsigned Cost = 0; Type *RetTy = ToVectorTy(I->getType(), VF); + if (!RetTy->isVoidTy()) + Cost += TTI.getScalarizationOverhead(RetTy, true, false); - SmallVector OpTys; - unsigned OperandsNum = I->getNumOperands(); - for (unsigned OpInd = 0; OpInd < OperandsNum; ++OpInd) - OpTys.push_back(ToVectorTy(I->getOperand(OpInd)->getType(), VF)); - - unsigned Cost = getScalarizationOverhead(OpTys, RetTy, TTI); + SmallVector Operands(I->operand_values()); + Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); // if (supportsVectorElementAccess() && if (isa(I) || isa(I)) { @@ -6838,8 +6818,8 @@ // Compute the scalarization overhead of needed insertelement instructions // and phi nodes. if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) { - ScalarCost += getScalarizationOverhead(ToVectorTy(I->getType(), VF), true, - false, TTI); + ScalarCost += TTI.getScalarizationOverhead(ToVectorTy(I->getType(), VF), + true, false); ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI); } @@ -6854,8 +6834,8 @@ if (canBeScalarized(J)) Worklist.push_back(J); else if (needsExtract(J)) - ScalarCost += getScalarizationOverhead(ToVectorTy(J->getType(), VF), - false, true, TTI); + ScalarCost += TTI.getScalarizationOverhead( + ToVectorTy(J->getType(),VF), false, true); } // Scale the total scalar cost by block probability.