Index: clang/include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- clang/include/clang/Basic/BuiltinsAMDGPU.def +++ clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -155,14 +155,18 @@ BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc") BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc") +//===----------------------------------------------------------------------===// +// Flat addressing builtins. +//===----------------------------------------------------------------------===// +BUILTIN(__builtin_amdgcn_is_shared, "bvC*0", "nc") +BUILTIN(__builtin_amdgcn_is_private, "bvC*0", "nc") + //===----------------------------------------------------------------------===// // CI+ only builtins. //===----------------------------------------------------------------------===// TARGET_BUILTIN(__builtin_amdgcn_s_dcache_inv_vol, "v", "n", "ci-insts") TARGET_BUILTIN(__builtin_amdgcn_buffer_wbinvl1_vol, "v", "n", "ci-insts") TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_release_all, "vUi", "n", "ci-insts") -TARGET_BUILTIN(__builtin_amdgcn_is_shared, "bvC*0", "nc", "flat-address-space") -TARGET_BUILTIN(__builtin_amdgcn_is_private, "bvC*0", "nc", "flat-address-space") //===----------------------------------------------------------------------===// // Interpolation builtins. Index: clang/test/CodeGenOpenCL/builtins-amdgcn-flat-address-space.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/builtins-amdgcn-flat-address-space.cl @@ -0,0 +1,22 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu tahiti -S -emit-llvm -disable-llvm-passes -o - %s | FileCheck -enable-var-scope %s + +// SI did not actually support flat addressing, but we can codegen the address +// space test builtins. The target specfic part is a load from the implicit +// argument buffer to use for the high pointer bits. It's just that buffer won't +// be initialized to something useful. The proper way to diagnose invalid flat +// usage is to forbid flat pointers on unsupported targets. + +// CHECK-LABEL: @test_is_shared_global( +// CHECK: [[CAST:%[0-9]+]] = addrspacecast ptr addrspace(1) %{{[0-9]+}} to ptr +// CHECK: call i1 @llvm.amdgcn.is.shared(ptr [[CAST]] +int test_is_shared_global(const global int* ptr) { + return __builtin_amdgcn_is_shared(ptr); +} + +// CHECK-LABEL: @test_is_private_global( +// CHECK: [[CAST:%[0-9]+]] = addrspacecast ptr addrspace(1) %{{[0-9]+}} to ptr +// CHECK: call i1 @llvm.amdgcn.is.private(ptr [[CAST]] +int test_is_private_global(const global int* ptr) { + return __builtin_amdgcn_is_private(ptr); +} Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -1,5 +1,5 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -no-opaque-pointers -cl-std=CL2.0 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -no-opaque-pointers -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu tahiti -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s #pragma OPENCL EXTENSION cl_khr_fp64 : enable Index: clang/test/SemaOpenCL/builtins-amdgcn-error-flat-address-space.cl =================================================================== --- clang/test/SemaOpenCL/builtins-amdgcn-error-flat-address-space.cl +++ clang/test/SemaOpenCL/builtins-amdgcn-error-flat-address-space.cl @@ -1,8 +1,12 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s +// expected-no-diagnostics + +// Make sure no warning is produced on due to dead "flat-address-space" feature. +__attribute__((target("flat-address-space"))) void test_flat_address_space_builtins(int* ptr) { - (void)__builtin_amdgcn_is_shared(ptr); // expected-error {{'__builtin_amdgcn_is_shared' needs target feature flat-address-space}} - (void)__builtin_amdgcn_is_private(ptr); // expected-error {{'__builtin_amdgcn_is_private' needs target feature flat-address-space}} + (void)__builtin_amdgcn_is_shared(ptr); + (void)__builtin_amdgcn_is_private(ptr); } Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll @@ -1,8 +1,9 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}is_private_vgpr: -; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]] +; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]] ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11 ; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16 Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll @@ -1,8 +1,9 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}is_local_vgpr: -; GCN-DAG: {{flat|global}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]] +; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]] ; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10 ; GFX9-DAG: s_getreg_b32 [[APERTURE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16