Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -190,7 +190,7 @@ /// The contract for this function is the same as \c getOperationCost except /// that it supports an interface that provides extra information specific to /// the GEP operation. - int getGEPCost(Type *PointeeType, const Value *Ptr, + int getGEPCost(const GEPOperator *GEP, ArrayRef Operands) const; /// \brief Estimate the cost of a EXT operation when lowered. @@ -930,7 +930,7 @@ virtual ~Concept() = 0; virtual const DataLayout &getDataLayout() const = 0; virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; - virtual int getGEPCost(Type *PointeeType, const Value *Ptr, + virtual int getGEPCost(const GEPOperator *GEP, ArrayRef Operands) = 0; virtual int getExtCost(const Instruction *I, const Value *Src) = 0; virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0; @@ -1109,9 +1109,9 @@ int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override { return Impl.getOperationCost(Opcode, Ty, OpTy); } - int getGEPCost(Type *PointeeType, const Value *Ptr, + int getGEPCost(const GEPOperator *GEP, ArrayRef Operands) override { - return Impl.getGEPCost(PointeeType, Ptr, Operands); + return Impl.getGEPCost(GEP, Operands); } int getExtCost(const Instruction *I, const Value *Src) override { return Impl.getExtCost(I, Src); Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -103,8 +103,7 @@ } } - int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) { + int getGEPCost(const GEPOperator *GEP, ArrayRef Operands) { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -663,8 +662,10 @@ using BaseT::getGEPCost; - int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) { + int getGEPCost(const GEPOperator *GEP, ArrayRef Operands) { + Type *PointeeType = GEP->getSourceElementType(); + const Value *Ptr = GEP->getPointerOperand(); + const GlobalValue *BaseGV = nullptr; if (Ptr != nullptr) { // TODO: will remove this when pointers have an opaque type. @@ -717,8 +718,28 @@ (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); if (static_cast(this)->isLegalAddressingMode( TargetType, const_cast(BaseGV), BaseOffset, - HasBaseReg, Scale, AS)) + HasBaseReg, Scale, AS)) { + // Should check if the GEP is actually used in load / store instructions. + // For simplicity, we check only direct users of the GEP. + // + // FIXME: GEPs could also be folded away as a part of addressing mode in + // load/store instructions togetehr with other instructions (e.g., sext or + // other GEPs). Handling all such cases must be expensive to be performed + // in this function, so we stay conservative for now. + for (const User *U : GEP->users()) { + const Operator *UOP = cast(U); + const Value *PointerOperand = nullptr; + if (auto *LI = dyn_cast(UOP)) + PointerOperand = LI->getPointerOperand(); + else if (auto *SI = dyn_cast(UOP)) + PointerOperand = SI->getPointerOperand(); + + if ((!PointerOperand || PointerOperand != GEP) && + !GEP->hasAllZeroIndices()) + return TTI::TCC_Basic; + } return TTI::TCC_Free; + } return TTI::TCC_Basic; } @@ -740,11 +761,9 @@ if (isa(U)) return TTI::TCC_Free; // Model all PHI nodes as free. - if (const GEPOperator *GEP = dyn_cast(U)) { - return static_cast(this)->getGEPCost(GEP->getSourceElementType(), - GEP->getPointerOperand(), + if (const GEPOperator *GEP = dyn_cast(U)) + return static_cast(this)->getGEPCost(GEP, Operands.drop_front()); - } if (auto CS = ImmutableCallSite(U)) { const Function *F = CS.getCalledFunction(); Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -146,9 +146,9 @@ return getTLI()->isTypeLegal(VT); } - int getGEPCost(Type *PointeeType, const Value *Ptr, + int getGEPCost(const GEPOperator *GEP, ArrayRef Operands) { - return BaseT::getGEPCost(PointeeType, Ptr, Operands); + return BaseT::getGEPCost(GEP, Operands); } int getExtCost(const Instruction *I, const Value *Src) { Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -83,9 +83,9 @@ return TTIImpl->getInliningThresholdMultiplier(); } -int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, +int TargetTransformInfo::getGEPCost(const GEPOperator *GEP, ArrayRef Operands) const { - return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); + return TTIImpl->getGEPCost(GEP, Operands); } int TargetTransformInfo::getExtCost(const Instruction *I, Index: lib/Transforms/Scalar/NaryReassociate.cpp =================================================================== --- lib/Transforms/Scalar/NaryReassociate.cpp +++ lib/Transforms/Scalar/NaryReassociate.cpp @@ -264,7 +264,7 @@ SmallVector Indices; for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) Indices.push_back(*I); - return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), + return TTI->getGEPCost(cast(GEP), Indices) == TargetTransformInfo::TCC_Free; } Index: lib/Transforms/Scalar/StraightLineStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -239,7 +239,7 @@ SmallVector Indices; for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) Indices.push_back(*I); - return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), + return TTI->getGEPCost(cast(GEP), Indices) == TargetTransformInfo::TCC_Free; } Index: test/Analysis/CostModel/AArch64/gep.ll =================================================================== --- test/Analysis/CostModel/AArch64/gep.ll +++ test/Analysis/CostModel/AArch64/gep.ll @@ -290,3 +290,49 @@ %v = load i64, i64* %a ret i64 %v } + +; CHECK-LABEL: test37 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8 @test37(i64 %j, i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 %j + %l1 = call i8* @func(i8** %arrayidx0) + ret i8 0 +} + +; CHECK-LABEL: test38 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8 @test38(i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 10 + %l1 = call i8* @func(i8** %arrayidx0) + ret i8 0 +} + +; CHECK-LABEL:test39 +; CHECK: cost of 0 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8 @test39(i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 0 + %l1 = call i8* @func(i8** %arrayidx0) + ret i8 0 +} + +; CHECK-LABEL:test40 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8** @test40(i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 10 + ret i8** %arrayidx0 +} + +; CHECK-LABEL:test41 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8, i8* +define i8 @test41(i8* %V, i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8, i8* %V, i64 10 + store i8* %arrayidx0, i8** %P + ret i8 0 +} + +declare i8* @func(i8**) Index: test/Analysis/CostModel/X86/vector_gep.ll =================================================================== --- test/Analysis/CostModel/X86/vector_gep.ll +++ test/Analysis/CostModel/X86/vector_gep.ll @@ -8,9 +8,9 @@ define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){ %temp = insertelement <4 x i64> undef, i64 %base, i32 0 %vector = shufflevector <4 x i64> %temp, <4 x i64> undef, <4 x i32> zeroinitializer -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds %struct.S +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds %struct.S %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32] +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds [1000 x i32] %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> , <4 x i32> undef) ret <4 x i32> %res