Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -343,6 +343,11 @@ TLI->isZExtFree(SrcLT.second, DstLT.second)) return 0; + if (Opcode == Instruction::AddrSpaceCast && + TLI->isNoopAddrSpaceCast(Src->getPointerAddressSpace(), + Dst->getPointerAddressSpace())) + return 0; + // If the cast is marked as legal (or promote) then assume low cost. if (SrcLT.first == DstLT.first && TLI->isOperationLegalOrPromote(ISD, DstLT.second)) Index: test/Analysis/CostModel/AMDGPU/addrspacecast.ll =================================================================== --- /dev/null +++ test/Analysis/CostModel/AMDGPU/addrspacecast.ll @@ -0,0 +1,45 @@ +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck %s + +; CHECK: 'addrspacecast_global_to_flat' +; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(4)* +define i8 addrspace(4)* @addrspacecast_global_to_flat(i8 addrspace(1)* %ptr) #0 { + %cast = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(4)* + ret i8 addrspace(4)* %cast +} + +; CHECK: 'addrspacecast_global_to_flat_v2' +; CHECK: estimated cost of 0 for {{.*}} addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8 addrspace(4)*> +define <2 x i8 addrspace(4)*> @addrspacecast_global_to_flat_v2(<2 x i8 addrspace(1)*> %ptr) #0 { + %cast = addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8 addrspace(4)*> + ret <2 x i8 addrspace(4)*> %cast +} + +; CHECK: 'addrspacecast_global_to_flat_v32' +; CHECK: estimated cost of 0 for {{.*}} addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8 addrspace(4)*> +define <32 x i8 addrspace(4)*> @addrspacecast_global_to_flat_v32(<32 x i8 addrspace(1)*> %ptr) #0 { + %cast = addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8 addrspace(4)*> + ret <32 x i8 addrspace(4)*> %cast +} + +; CHECK: 'addrspacecast_local_to_flat' +; CHECK: estimated cost of 1 for {{.*}} addrspacecast i8 addrspace(3)* %ptr to i8 addrspace(4)* +define i8 addrspace(4)* @addrspacecast_local_to_flat(i8 addrspace(3)* %ptr) #0 { + %cast = addrspacecast i8 addrspace(3)* %ptr to i8 addrspace(4)* + ret i8 addrspace(4)* %cast +} + +; CHECK: 'addrspacecast_local_to_flat_v2' +; CHECK: estimated cost of 2 for {{.*}} addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8 addrspace(4)*> +define <2 x i8 addrspace(4)*> @addrspacecast_local_to_flat_v2(<2 x i8 addrspace(3)*> %ptr) #0 { + %cast = addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8 addrspace(4)*> + ret <2 x i8 addrspace(4)*> %cast +} + +; CHECK: 'addrspacecast_local_to_flat_v32' +; CHECK: estimated cost of 32 for {{.*}} addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8 addrspace(4)*> +define <32 x i8 addrspace(4)*> @addrspacecast_local_to_flat_v32(<32 x i8 addrspace(3)*> %ptr) #0 { + %cast = addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8 addrspace(4)*> + ret <32 x i8 addrspace(4)*> %cast +} + +attributes #0 = { nounwind readnone }