diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -284,9 +284,19 @@ }; /// Estimate the cost of a GEP operation when lowered. + /// + /// \p PointeeType is the source element type of the GEP. + /// \p Ptr is the base pointer operand. + /// \p Operands is the list of indices following the base pointer. + /// + /// \p AccessType is a hint as to what type of memory might be accessed by + /// users of the GEP. getGEPCost will use it to determine if the GEP can be + /// folded into the addressing mode of a load/store. If AccessType is null, + /// then the resulting target type based off of PointeeType will be used as an + /// approximation. InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands, + ArrayRef Operands, Type *AccessType = nullptr, TargetCostKind CostKind = TCK_SizeAndLatency) const; /// Describe known properties for a set of pointers. @@ -1680,6 +1690,7 @@ virtual const DataLayout &getDataLayout() const = 0; virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands, + Type *AccessType, TTI::TargetCostKind CostKind) = 0; virtual InstructionCost getPointersChainCost(ArrayRef Ptrs, const Value *Base, @@ -2041,9 +2052,9 @@ InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands, + ArrayRef Operands, Type *AccessType, TargetTransformInfo::TargetCostKind CostKind) override { - return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind); + return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind); } InstructionCost getPointersChainCost(ArrayRef Ptrs, const Value *Base, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -47,7 +47,7 @@ const DataLayout &getDataLayout() const { return DL; } InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands, + ArrayRef Operands, Type *AccessType, TTI::TargetCostKind CostKind) const { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. @@ -987,7 +987,7 @@ using BaseT::getGEPCost; InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands, + ArrayRef Operands, Type *AccessType, TTI::TargetCostKind CostKind) { assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); assert(cast(Ptr->getType()->getScalarType()) @@ -1041,11 +1041,29 @@ } } + // If we haven't been provided a hint, use the target type for now. + // + // TODO: Take a look at potentially removing this: This is *slightly* wrong + // as it's possible to have a GEP with a foldable target type but a memory + // access that isn't foldable. For example, this load isn't foldable on + // RISC-V: + // + // %p = getelementptr i32, ptr %base, i32 42 + // %x = load <2 x i32>, ptr %p + if (!AccessType) + AccessType = TargetType; + + // If the final address of the GEP is a legal addressing mode for the given + // access type, then we can fold it into its users. if (static_cast(this)->isLegalAddressingMode( - TargetType, const_cast(BaseGV), + AccessType, const_cast(BaseGV), BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, Ptr->getType()->getPointerAddressSpace())) return TTI::TCC_Free; + + // TODO: Instead of returning TCC_Basic here, we should use + // getArithmeticInstrCost. Or better yet, provide a hook to let the target + // model it. return TTI::TCC_Basic; } @@ -1080,7 +1098,7 @@ SmallVector Indices(GEP->indices()); Cost += static_cast(this)->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), - Indices, CostKind); + Indices, nullptr, CostKind); } } return Cost; @@ -1132,9 +1150,15 @@ break; case Instruction::GetElementPtr: { const auto *GEP = cast(U); + Type *AccessType = nullptr; + // For now, only provide the AccessType in the simple case where the GEP + // only has one user. + if (GEP->hasOneUser() && I) + AccessType = I->user_back()->getAccessType(); + return TargetTTI->getGEPCost(GEP->getSourceElementType(), Operands.front(), Operands.drop_front(), - CostKind); + AccessType, CostKind); } case Instruction::Add: case Instruction::FAdd: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -420,9 +420,9 @@ } InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands, + ArrayRef Operands, Type *AccessType, TTI::TargetCostKind CostKind) { - return BaseT::getGEPCost(PointeeType, Ptr, Operands, CostKind); + return BaseT::getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind); } unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -226,11 +226,10 @@ return TTIImpl->getInlinerVectorBonusPercent(); } -InstructionCost -TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands, - TTI::TargetCostKind CostKind) const { - return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind); +InstructionCost TargetTransformInfo::getGEPCost( + Type *PointeeType, const Value *Ptr, ArrayRef Operands, + Type *AccessType, TTI::TargetCostKind CostKind) const { + return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind); } InstructionCost TargetTransformInfo::getPointersChainCost( diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1641,7 +1641,7 @@ } else { SmallVector Indices(GEP->indices()); Cost += getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), - Indices, CostKind); + Indices, nullptr, CostKind); } } return Cost; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -4969,7 +4969,8 @@ if (const auto *BaseGEP = dyn_cast(Base)) { SmallVector Indices(BaseGEP->indices()); return getGEPCost(BaseGEP->getSourceElementType(), - BaseGEP->getPointerOperand(), Indices, CostKind); + BaseGEP->getPointerOperand(), Indices, nullptr, + CostKind); } return TTI::TCC_Free; } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7454,7 +7454,8 @@ if (const auto *Base = dyn_cast(BasePtr)) { SmallVector Indices(Base->indices()); VecCost = TTI->getGEPCost(Base->getSourceElementType(), - Base->getPointerOperand(), Indices, CostKind); + Base->getPointerOperand(), Indices, nullptr, + CostKind); } } diff --git a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll --- a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll @@ -526,7 +526,7 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbsb, i32 2, <16 x i1> %mask, <16 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 224 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbsb, i32 2, <16 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %indzext4 = zext <16 x i8> %ind8 to <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep4 = getelementptr i8, ptr %base, <16 x i32> %indzext +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep4 = getelementptr i8, ptr %base, <16 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %indtrunc = trunc <16 x i32> %ind32 to <16 x i8> ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %indtrunc, <16 x ptr> %gep4, i32 2, <16 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void diff --git a/llvm/test/Analysis/CostModel/RISCV/gep.ll b/llvm/test/Analysis/CostModel/RISCV/gep.ll --- a/llvm/test/Analysis/CostModel/RISCV/gep.ll +++ b/llvm/test/Analysis/CostModel/RISCV/gep.ll @@ -261,29 +261,29 @@ ; be folded into the instruction. define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) { ; RVI-LABEL: 'non_foldable_vector_uses' -; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = getelementptr i8, ptr %base, i32 42 +; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = getelementptr i8, ptr %base, i32 42 ; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x1 = load volatile <2 x i8>, ptr %1, align 2 -; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %2 = getelementptr i8, ptr %base, i32 42 +; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = getelementptr i8, ptr %base, i32 42 ; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %2, i32 1, <2 x i1> undef, <2 x i8> undef) ; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> ; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x3 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %3, i32 1, <2 x i1> undef, <2 x i8> undef) -; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %4 = getelementptr i8, ptr %base, i32 42 +; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = getelementptr i8, ptr %base, i32 42 ; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef) -; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = getelementptr i8, ptr %base, i32 42 +; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = getelementptr i8, ptr %base, i32 42 ; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef) -; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %6 = getelementptr i8, ptr %base, i32 42 +; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = getelementptr i8, ptr %base, i32 42 ; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef) -; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %7 = getelementptr i8, ptr %base, i32 42 +; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = getelementptr i8, ptr %base, i32 42 ; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store volatile <2 x i8> undef, ptr %7, align 2 -; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %8 = getelementptr i8, ptr %base, i32 42 +; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = getelementptr i8, ptr %base, i32 42 ; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> undef, ptr %8, i32 1, <2 x i1> undef) ; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> ; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> %9, i32 1, <2 x i1> undef) -; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %10 = getelementptr i8, ptr %base, i32 42 +; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = getelementptr i8, ptr %base, i32 42 ; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef) -; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %11 = getelementptr i8, ptr %base, i32 42 +; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = getelementptr i8, ptr %base, i32 42 ; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef) -; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %12 = getelementptr i8, ptr %base, i32 42 +; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = getelementptr i8, ptr %base, i32 42 ; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef) ; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ;