Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -193,6 +193,13 @@ int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands) const; + /// \brief Estimate the cost of a GEP operation when lowered. + /// + /// This user-based overload adds the ability to check if the GEP can be + /// folded into its users. + int getGEPCost(const GEPOperator *GEP, + ArrayRef Operands) const; + /// \brief Estimate the cost of a EXT operation when lowered. /// /// The contract for this function is the same as \c getOperationCost except @@ -251,9 +258,9 @@ /// \brief Estimate the cost of a given IR user when lowered. /// /// This can estimate the cost of either a ConstantExpr or Instruction when - /// lowered. It has two primary advantages over the \c getOperationCost and - /// \c getGEPCost above, and one significant disadvantage: it can only be - /// used when the IR construct has already been formed. + /// lowered. It has two primary advantages over the \c getOperationCost above, + /// and one significant disadvantage: it can only be used when the IR + /// construct has already been formed. /// /// The advantages are that it can inspect the SSA use graph to reason more /// accurately about the cost. For example, all-constant-GEPs can often be @@ -932,6 +939,8 @@ virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; virtual int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands) = 0; + virtual int getGEPCost(const GEPOperator *GEP, + ArrayRef Operands) = 0; virtual int getExtCost(const Instruction *I, const Value *Src) = 0; virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0; virtual int getCallCost(const Function *F, int NumArgs) = 0; @@ -1113,6 +1122,10 @@ ArrayRef Operands) override { return Impl.getGEPCost(PointeeType, Ptr, Operands); } + int getGEPCost(const GEPOperator *GEP, + ArrayRef Operands) override { + return Impl.getGEPCost(GEP, Operands); + } int getExtCost(const Instruction *I, const Value *Src) override { return Impl.getExtCost(I, Src); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -722,6 +722,35 @@ return TTI::TCC_Basic; } + int getGEPCost(const GEPOperator *GEP, ArrayRef Operands) { + Type *PointeeType = GEP->getSourceElementType(); + const Value *Ptr = GEP->getPointerOperand(); + + if (getGEPCost(PointeeType, Ptr, Operands) == TTI::TCC_Free) { + // Should check if the GEP is actually used in load / store instructions. + // For simplicity, we check only direct users of the GEP. + // + // FIXME: GEPs could also be folded away as a part of addressing mode in + // load/store instructions togetehr with other instructions (e.g., sext or + // other GEPs). Handling all such cases must be expensive to be performed + // in this function, so we stay conservative for now. + for (const User *U : GEP->users()) { + const Operator *UOP = cast(U); + const Value *PointerOperand = nullptr; + if (auto *LI = dyn_cast(UOP)) + PointerOperand = LI->getPointerOperand(); + else if (auto *SI = dyn_cast(UOP)) + PointerOperand = SI->getPointerOperand(); + + if ((!PointerOperand || PointerOperand != GEP) && + !GEP->hasAllZeroIndices()) + return TTI::TCC_Basic; + } + return TTI::TCC_Free; + } + return TTI::TCC_Basic; + } + using BaseT::getIntrinsicCost; unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, @@ -740,11 +769,9 @@ if (isa(U)) return TTI::TCC_Free; // Model all PHI nodes as free. - if (const GEPOperator *GEP = dyn_cast(U)) { - return static_cast(this)->getGEPCost(GEP->getSourceElementType(), - GEP->getPointerOperand(), + if (const GEPOperator *GEP = dyn_cast(U)) + return static_cast(this)->getGEPCost(GEP, Operands.drop_front()); - } if (auto CS = ImmutableCallSite(U)) { const Function *F = CS.getCalledFunction(); Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -151,6 +151,11 @@ return BaseT::getGEPCost(PointeeType, Ptr, Operands); } + int getGEPCost(const GEPOperator *GEP, + ArrayRef Operands) { + return BaseT::getGEPCost(GEP, Operands); + } + int getExtCost(const Instruction *I, const Value *Src) { if (getTLI()->isExtFree(I)) return TargetTransformInfo::TCC_Free; Index: include/llvm/IR/Operator.h =================================================================== --- include/llvm/IR/Operator.h +++ include/llvm/IR/Operator.h @@ -456,6 +456,8 @@ if (ConstantInt *C = dyn_cast(I)) if (C->isZero()) continue; + if (isa(I)) + continue; return false; } return true; Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -88,6 +88,11 @@ return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); } +int TargetTransformInfo::getGEPCost(const GEPOperator *GEP, + ArrayRef Operands) const { + return TTIImpl->getGEPCost(GEP, Operands); +} + int TargetTransformInfo::getExtCost(const Instruction *I, const Value *Src) const { return TTIImpl->getExtCost(I, Src); Index: lib/Transforms/Scalar/NaryReassociate.cpp =================================================================== --- lib/Transforms/Scalar/NaryReassociate.cpp +++ lib/Transforms/Scalar/NaryReassociate.cpp @@ -264,7 +264,7 @@ SmallVector Indices; for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) Indices.push_back(*I); - return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), + return TTI->getGEPCost(cast(GEP), Indices) == TargetTransformInfo::TCC_Free; } Index: lib/Transforms/Scalar/StraightLineStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -239,7 +239,7 @@ SmallVector Indices; for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) Indices.push_back(*I); - return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), + return TTI->getGEPCost(cast(GEP), Indices) == TargetTransformInfo::TCC_Free; } Index: test/Analysis/CostModel/AArch64/gep.ll =================================================================== --- test/Analysis/CostModel/AArch64/gep.ll +++ test/Analysis/CostModel/AArch64/gep.ll @@ -290,3 +290,49 @@ %v = load i64, i64* %a ret i64 %v } + +; CHECK-LABEL: test37 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8 @test37(i64 %j, i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 %j + %l1 = call i8* @func(i8** %arrayidx0) + ret i8 0 +} + +; CHECK-LABEL: test38 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8 @test38(i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 10 + %l1 = call i8* @func(i8** %arrayidx0) + ret i8 0 +} + +; CHECK-LABEL:test39 +; CHECK: cost of 0 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8 @test39(i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 0 + %l1 = call i8* @func(i8** %arrayidx0) + ret i8 0 +} + +; CHECK-LABEL:test40 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8*, i8** +define i8** @test40(i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8*, i8** %P, i64 10 + ret i8** %arrayidx0 +} + +; CHECK-LABEL:test41 +; CHECK: cost of 1 for instruction: {{.*}} = getelementptr inbounds i8, i8* +define i8 @test41(i8* %V, i8** readonly %P) { +entry: + %arrayidx0 = getelementptr inbounds i8, i8* %V, i64 10 + store i8* %arrayidx0, i8** %P + ret i8 0 +} + +declare i8* @func(i8**) Index: test/Analysis/CostModel/X86/vector_gep.ll =================================================================== --- test/Analysis/CostModel/X86/vector_gep.ll +++ test/Analysis/CostModel/X86/vector_gep.ll @@ -10,7 +10,7 @@ %vector = shufflevector <4 x i64> %temp, <4 x i64> undef, <4 x i32> zeroinitializer ;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds %struct.S %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32] +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds [1000 x i32] %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> , <4 x i32> undef) ret <4 x i32> %res