Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -136,7 +136,7 @@ /// The contract for this function is the same as \c getOperationCost except /// that it supports an interface that provides extra information specific to /// the GEP operation. - unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) const; + unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) const; /// \brief Estimate the cost of a function call when lowered. /// @@ -520,8 +520,7 @@ virtual ~Concept() = 0; virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; - virtual unsigned getGEPCost(const Value *Ptr, - ArrayRef Operands) = 0; + virtual unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) = 0; virtual unsigned getCallCost(FunctionType *FTy, int NumArgs) = 0; virtual unsigned getCallCost(const Function *F, int NumArgs) = 0; virtual unsigned getCallCost(const Function *F, @@ -608,8 +607,7 @@ unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override { return Impl.getOperationCost(Opcode, Ty, OpTy); } - unsigned getGEPCost(const Value *Ptr, - ArrayRef Operands) override { + unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) override { return Impl.getGEPCost(Ptr, Operands); } unsigned getCallCost(FunctionType *FTy, int NumArgs) override { Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -107,7 +107,7 @@ } } - unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) { + unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -386,6 +386,46 @@ return static_cast(this)->getCallCost(F, Arguments.size()); } + using BaseT::getGEPCost; + + unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) { + const GlobalValue *BaseGV = dyn_cast(Ptr); + bool HasBaseReg = (BaseGV == nullptr); + int64_t BaseOffset = 0; + int64_t Scale = 0; + + generic_gep_type_iterator GTI = + gep_type_begin(Ptr->getType()->getScalarType(), Operands); + for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { + if (isa(*GTI)) { + int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + if (const ConstantInt *ConstIdx = dyn_cast(*I)) { + BaseOffset += ConstIdx->getSExtValue() * ElementSize; + } else { + // Needs scale register. + if (Scale != 0) { + // No addressing mode takes two scale registers. + return TTI::TCC_Basic; + } + Scale = ElementSize; + } + } else { + StructType *STy = cast(*GTI); + uint64_t Field = cast(*I)->getZExtValue(); + BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field); + } + } + + Type *GEPReturnTy = + GetElementPtrInst::getGEPReturnType(const_cast(Ptr), Operands); + if (static_cast(this)->isLegalAddressingMode( + GEPReturnTy, const_cast(BaseGV), BaseOffset, + HasBaseReg, Scale)) { + return TTI::TCC_Free; + } + return TTI::TCC_Basic; + } + using BaseT::getIntrinsicCost; unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, @@ -405,7 +445,7 @@ return TTI::TCC_Free; // Model all PHI nodes as free. if (const GEPOperator *GEP = dyn_cast(U)) { - SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); + SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); return static_cast(this) ->getGEPCost(GEP->getPointerOperand(), Indices); } Index: lib/Analysis/CostModel.cpp =================================================================== --- lib/Analysis/CostModel.cpp +++ lib/Analysis/CostModel.cpp @@ -384,8 +384,7 @@ switch (I->getOpcode()) { case Instruction::GetElementPtr:{ - Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); - return TTI->getAddressComputationCost(ValTy); + return TTI->getUserCost(I); } case Instruction::Ret: Index: lib/Transforms/Scalar/StraightLineStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -231,6 +231,7 @@ Basis.CandidateKind == C.CandidateKind); } +// TODO: use TTI->getGEPCost. static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI, const DataLayout *DL) { Index: test/Analysis/CostModel/ARM/gep.ll =================================================================== --- test/Analysis/CostModel/ARM/gep.ll +++ test/Analysis/CostModel/ARM/gep.ll @@ -3,41 +3,98 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios6.0.0" -define void @test_geps() { - ; Cost of scalar integer geps should be one. We can't always expect it to be - ; folded into the instruction addressing mode. -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8* +define void @test_geps(i32 %i) { +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* %a0 = getelementptr inbounds i8, i8* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16* +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16* %a1 = getelementptr inbounds i16, i16* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32* +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32* %a2 = getelementptr inbounds i32, i32* undef, i32 0 - -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64* +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64* %a3 = getelementptr inbounds i64, i64* undef, i32 0 ; Cost of scalar floating point geps should be one. We cannot fold the address ; computation. -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float* +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float* %a4 = getelementptr inbounds float, float* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double* +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double* %a5 = getelementptr inbounds double, double* undef, i32 0 - ; Cost of vector geps should be one. We cannot fold the address computation. -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0 -;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* + %b0 = getelementptr inbounds i8, i8* undef, i32 1024 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16* + %b1 = getelementptr inbounds i16, i16* undef, i32 1024 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32* + %b2 = getelementptr inbounds i32, i32* undef, i32 1024 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64* + %b3 = getelementptr inbounds i64, i64* undef, i32 1024 + + ; Cost of scalar floating point geps should be one. We cannot fold the address + ; computation. +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float* + %b4 = getelementptr inbounds float, float* undef, i32 1024 +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double* + %b5 = getelementptr inbounds double, double* undef, i32 1024 + + ; Cost of vector geps should be one. We cannot fold the address computation. +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* + %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* + %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* + %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* + %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* + %b11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1 +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* + %b12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1 + +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* + %c0 = getelementptr inbounds i8, i8* undef, i32 %i +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16* + %c1 = getelementptr inbounds i16, i16* undef, i32 %i +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32* + %c2 = getelementptr inbounds i32, i32* undef, i32 %i +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64* + %c3 = getelementptr inbounds i64, i64* undef, i32 %i + + ; Cost of scalar floating point geps should be one. We cannot fold the address + ; computation. +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float* + %c4 = getelementptr inbounds float, float* undef, i32 %i +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double* + %c5 = getelementptr inbounds double, double* undef, i32 %i + + ; Cost of vector geps should be one. We cannot fold the address computation. +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* + %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* + %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* + %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* + %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* + %c11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* + %c12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i + + ret void } Index: test/Analysis/CostModel/no_info.ll =================================================================== --- test/Analysis/CostModel/no_info.ll +++ test/Analysis/CostModel/no_info.ll @@ -5,9 +5,27 @@ ; -- No triple in this module -- -;CHECK: cost of 1 {{.*}} add -;CHECK: cost of 1 {{.*}} ret +; CHECK-LABEL: function 'no_info' +; CHECK: cost of 1 {{.*}} add +; CHECK: cost of 1 {{.*}} ret define i32 @no_info(i32 %arg) { %e = add i32 %arg, %arg ret i32 %e } + +define i8 @addressing_mode_reg_reg(i8* %a, i32 %b) { +; CHECK-LABEL: function 'addressing_mode_reg_reg' + %p = getelementptr i8, i8* %a, i32 %b ; NoTTI accepts reg+reg addressing. +; CHECK: cost of 0 {{.*}} getelementptr + %v = load i8, i8* %p + ret i8 %v +} + +; CHECK-LABEL: function 'addressing_mode_scaled_reg' +define i32 @addressing_mode_scaled_reg(i32* %a, i32 %b) { + ; NoTTI rejects reg+scale*reg addressing. + %p = getelementptr i32, i32* %a, i32 %b +; CHECK: cost of 1 {{.*}} getelementptr + %v = load i32, i32* %p + ret i32 %v +} Index: test/Transforms/LoopVectorize/X86/metadata-enable.ll =================================================================== --- test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -60,7 +60,7 @@ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 32 + %exitcond = icmp eq i64 %indvars.iv.next, 64 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 for.end: ; preds = %for.body @@ -111,7 +111,7 @@ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 32 + %exitcond = icmp eq i64 %indvars.iv.next, 64 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body @@ -162,7 +162,7 @@ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 32 + %exitcond = icmp eq i64 %indvars.iv.next, 64 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 for.end: ; preds = %for.body