Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -483,7 +483,9 @@ int64_t Scale = 0; auto GTI = gep_type_begin(PointeeType, Operands); + Type *TargetType; for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { + TargetType = GTI.getIndexedType(); // We assume that the cost of Scalar GEP with constant index and the // cost of Vector GEP with splat constant index are the same. const ConstantInt *ConstIdx = dyn_cast(*I); @@ -513,11 +515,9 @@ unsigned AS = (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); if (static_cast(this)->isLegalAddressingMode( - PointerType::get(Type::getInt8Ty(PointeeType->getContext()), AS), - const_cast(BaseGV), - BaseOffset, HasBaseReg, Scale, AS)) { + TargetType, const_cast(BaseGV), BaseOffset, + HasBaseReg, Scale, AS)) return TTI::TCC_Free; - } return TTI::TCC_Basic; } Index: llvm/trunk/test/Analysis/CostModel/AArch64/gep.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/AArch64/gep.ll +++ llvm/trunk/test/Analysis/CostModel/AArch64/gep.ll @@ -0,0 +1,196 @@ +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mcpu=kryo < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +define i8 @test1(i8* %p, i32 %i) { +; CHECK-LABEL: test1 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* + %a = getelementptr inbounds i8, i8* %p, i32 0 + %v = load i8, i8* %a + ret i8 %v +} + +define i16 @test2(i16* %p, i32 %i) { +; CHECK-LABEL: test2 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16* + %a = getelementptr inbounds i16, i16* %p, i32 0 + %v = load i16, i16* %a + ret i16 %v +} + +define i32 @test3(i32* %p, i32 %i) { +; CHECK-LABEL: test3 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32* + %a = getelementptr inbounds i32, i32* %p, i32 0 + %v = load i32, i32* %a + ret i32 %v +} + +define i64 @test4(i64* %p, i32 %i) { +; CHECK-LABEL: test4 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64* + %a = getelementptr inbounds i64, i64* %p, i32 0 + %v = load i64, i64* %a + ret i64 %v +} + +define i8 @test5(i8* %p, i32 %i) { +; CHECK-LABEL: test5 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* + %a = getelementptr inbounds i8, i8* %p, i32 1024 + %v = load i8, i8* %a + ret i8 %v +} + +define i16 @test6(i16* %p, i32 %i) { +; CHECK-LABEL: test6 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16* + %a = getelementptr inbounds i16, i16* %p, i32 1024 + %v = load i16, i16* %a + ret i16 %v +} + +define i32 @test7(i32* %p, i32 %i) { +; CHECK-LABEL: test7 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32* + %a = getelementptr inbounds i32, i32* %p, i32 1024 + %v = load i32, i32* %a + ret i32 %v +} + +define i64 @test8(i64* %p, i32 %i) { +; CHECK-LABEL: test8 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64* + %a = getelementptr inbounds i64, i64* %p, i32 1024 + %v = load i64, i64* %a + ret i64 %v +} + +define i8 @test9(i8* %p, i32 %i) { +; CHECK-LABEL: test9 +; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8* + %a = getelementptr inbounds i8, i8* %p, i32 4096 + %v = load i8, i8* %a + ret i8 %v +} + +define i16 @test10(i16* %p, i32 %i) { +; CHECK-LABEL: test10 +; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16* + %a = getelementptr inbounds i16, i16* %p, i32 4096 + %v = load i16, i16* %a + ret i16 %v +} + +define i32 @test11(i32* %p, i32 %i) { +; CHECK-LABEL: test11 +; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32* + %a = getelementptr inbounds i32, i32* %p, i32 4096 + %v = load i32, i32* %a + ret i32 %v +} + +define i64 @test12(i64* %p, i32 %i) { +; CHECK-LABEL: test12 +; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64* + %a = getelementptr inbounds i64, i64* %p, i32 4096 + %v = load i64, i64* %a + ret i64 %v +} + +define i8 @test13(i8* %p, i32 %i) { +; CHECK-LABEL: test13 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* + %a = getelementptr inbounds i8, i8* %p, i32 -64 + %v = load i8, i8* %a + ret i8 %v +} + +define i16 @test14(i16* %p, i32 %i) { +; CHECK-LABEL: test14 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16* + %a = getelementptr inbounds i16, i16* %p, i32 -64 + %v = load i16, i16* %a + ret i16 %v +} + +define i32 @test15(i32* %p, i32 %i) { +; CHECK-LABEL: test15 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32* + %a = getelementptr inbounds i32, i32* %p, i32 -64 + %v = load i32, i32* %a + ret i32 %v +} + +define i64 @test16(i64* %p, i32 %i) { +; CHECK-LABEL: test16 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64* + %a = getelementptr inbounds i64, i64* %p, i32 -64 + %v = load i64, i64* %a + ret i64 %v +} + +define i8 @test17(i8* %p, i32 %i) { +; CHECK-LABEL: test17 +; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8* + %a = getelementptr inbounds i8, i8* %p, i32 -1024 + %v = load i8, i8* %a + ret i8 %v +} + +define i16 @test18(i16* %p, i32 %i) { +; CHECK-LABEL: test18 +; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16* + %a = getelementptr inbounds i16, i16* %p, i32 -1024 + %v = load i16, i16* %a + ret i16 %v +} + +define i32 @test19(i32* %p, i32 %i) { +; CHECK-LABEL: test19 +; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32* + %a = getelementptr inbounds i32, i32* %p, i32 -1024 + %v = load i32, i32* %a + ret i32 %v +} + +define i64 @test20(i64* %p, i32 %i) { +; CHECK-LABEL: test20 +; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64* + %a = getelementptr inbounds i64, i64* %p, i32 -1024 + %v = load i64, i64* %a + ret i64 %v +} + +define i8 @test21(i8* %p, i32 %i) { +; CHECK-LABEL: test21 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* + %a = getelementptr inbounds i8, i8* %p, i32 %i + %v = load i8, i8* %a + ret i8 %v +} + +define i16 @test22(i16* %p, i32 %i) { +; CHECK-LABEL: test22 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16* + %a = getelementptr inbounds i16, i16* %p, i32 %i + %v = load i16, i16* %a + ret i16 %v +} + +define i32 @test23(i32* %p, i32 %i) { +; CHECK-LABEL: test23 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32* + %a = getelementptr inbounds i32, i32* %p, i32 %i + %v = load i32, i32* %a + ret i32 %v +} + +define i64 @test24(i64* %p, i32 %i) { +; CHECK-LABEL: test24 +; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64* + %a = getelementptr inbounds i64, i64* %p, i32 %i + %v = load i64, i64* %a + ret i64 %v +} Index: llvm/trunk/test/Analysis/CostModel/ARM/gep.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/ARM/gep.ll +++ llvm/trunk/test/Analysis/CostModel/ARM/gep.ll @@ -44,17 +44,17 @@ %b4 = getelementptr inbounds float, float* undef, i32 1024 ;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double* %b5 = getelementptr inbounds double, double* undef, i32 1024 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>* %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>* %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>* %b11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1 -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* %b12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1 ;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* @@ -63,15 +63,15 @@ %c1 = getelementptr inbounds i16, i16* undef, i32 %i ;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32* %c2 = getelementptr inbounds i32, i32* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64* %c3 = getelementptr inbounds i64, i64* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float* %c4 = getelementptr inbounds float, float* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double* %c5 = getelementptr inbounds double, double* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>* %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i -;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* +;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>* %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i ; Thumb-2 cannot fold scales larger than 8 to address computation. ;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*