diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu --- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu +++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip \ // RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \ // RUN: -o - | FileCheck %s @@ -9,72 +10,139 @@ #include "Inputs/cuda.h" // CHECK-LABEL: @_Z16use_dispatch_ptrPi( -// CHECK: %[[PTR:.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -// CHECK: %{{.*}} = addrspacecast i8 addrspace(4)* %[[PTR]] to i32* +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT]] to i32** +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[OUT_ADDR]] to i32** +// CHECK-NEXT: [[DISPATCH_PTR:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[DISPATCH_PTR]] to i32** +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i32 addrspace(1)* [[OUT_COERCE:%.*]] to i32* +// CHECK-NEXT: store i32* [[TMP0]], i32** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: [[OUT1:%.*]] = load i32*, i32** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: store i32* [[OUT1]], i32** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast i8 addrspace(4)* [[TMP1]] to i32* +// CHECK-NEXT: store i32* [[TMP2]], i32** [[DISPATCH_PTR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DISPATCH_PTR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP5]], align 4 +// CHECK-NEXT: ret void +// __global__ void use_dispatch_ptr(int* out) { const int* dispatch_ptr = (const int*)__builtin_amdgcn_dispatch_ptr(); *out = *dispatch_ptr; } -// CHECK-LABEL: @_Z12test_ds_fmaxf( -// CHECK: call contract float @llvm.amdgcn.ds.fmax.f32(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float %{{[^,]*}}, i32 0, i32 0, i1 false) __global__ +// CHECK-LABEL: @_Z12test_ds_fmaxf( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* +// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* +// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fmax.f32(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false) +// CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4 +// CHECK-NEXT: ret void +// void test_ds_fmax(float src) { __shared__ float shared; volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false); } // CHECK-LABEL: @_Z12test_ds_faddf( -// CHECK: call contract float @llvm.amdgcn.ds.fadd.f32(float addrspace(3)* @_ZZ12test_ds_faddfE6shared, float %{{[^,]*}}, i32 0, i32 0, i1 false) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* +// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* +// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fadd.f32(float addrspace(3)* @_ZZ12test_ds_faddfE6shared, float [[TMP0]], i32 0, i32 0, i1 false) +// CHECK-NEXT: store volatile float [[TMP1]], float* [[X_ASCAST]], align 4 +// CHECK-NEXT: ret void +// __global__ void test_ds_fadd(float src) { __shared__ float shared; volatile float x = __builtin_amdgcn_ds_faddf(&shared, src, 0, 0, false); } -// CHECK-LABEL: @_Z12test_ds_fminfPf(float %src, float addrspace(1)* %shared.coerce -// CHECK: %shared = alloca float*, align 8, addrspace(5) -// CHECK: %shared.ascast = addrspacecast float* addrspace(5)* %shared to float** -// CHECK: %shared.addr = alloca float*, align 8, addrspace(5) -// CHECK: %shared.addr.ascast = addrspacecast float* addrspace(5)* %shared.addr to float** -// CHECK: %[[S0:.*]] = addrspacecast float addrspace(1)* %shared.coerce to float* -// CHECK: store float* %[[S0]], float** %shared.ascast, align 8 -// CHECK: %shared1 = load float*, float** %shared.ascast, align 8 -// CHECK: store float* %shared1, float** %shared.addr.ascast, align 8 -// CHECK: %[[S1:.*]] = load float*, float** %shared.addr.ascast, align 8 -// CHECK: %[[S2:.*]] = addrspacecast float* %[[S1]] to float addrspace(3)* -// CHECK: call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* %[[S2]] +// CHECK-LABEL: @_Z12test_ds_fminfPf( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float** +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* +// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float** +// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float* +// CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: [[SHARED1:%.*]] = load float*, float** [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float* [[SHARED1]], float** [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float* [[TMP1]] to float addrspace(3)* +// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false) +// CHECK-NEXT: store volatile float [[TMP4]], float* [[X_ASCAST]], align 4 +// CHECK-NEXT: ret void +// __global__ void test_ds_fmin(float src, float *shared) { volatile float x = __builtin_amdgcn_ds_fminf(shared, src, 0, 0, false); } -// CHECK: @_Z33test_ret_builtin_nondef_addrspace -// CHECK: %[[X:.*]] = alloca i8*, align 8, addrspace(5) -// CHECK: %[[XC:.*]] = addrspacecast i8* addrspace(5)* %[[X]] to i8** -// CHECK: %[[Y:.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -// CHECK: %[[YASCAST:.*]] = addrspacecast i8 addrspace(4)* %[[Y]] to i8* -// CHECK: store i8* %[[YASCAST]], i8** %[[XC]], align 8 +// CHECK-LABEL: @_Z33test_ret_builtin_nondef_addrspacev( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = alloca i8*, align 8, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast i8* addrspace(5)* [[X]] to i8** +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i8 addrspace(4)* [[TMP0]] to i8* +// CHECK-NEXT: store i8* [[TMP1]], i8** [[X_ASCAST]], align 8 +// CHECK-NEXT: ret void +// __device__ void test_ret_builtin_nondef_addrspace() { void *x = __builtin_amdgcn_dispatch_ptr(); } // CHECK-LABEL: @_Z6endpgmv( -// CHECK: call void @llvm.amdgcn.endpgm() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.amdgcn.endpgm() +// CHECK-NEXT: ret void +// __global__ void endpgm() { __builtin_amdgcn_endpgm(); } // Check the 64 bit argument is correctly passed to the intrinsic without truncation or assertion. -// CHECK-LABEL: @_Z14test_uicmp_i64 -// CHECK: store i64* %out1, i64** %out.addr.ascast -// CHECK-NEXT: store i64 %a, i64* %a.addr.ascast -// CHECK-NEXT: store i64 %b, i64* %b.addr.ascast -// CHECK-NEXT: %[[V0:.*]] = load i64, i64* %a.addr.ascast -// CHECK-NEXT: %[[V1:.*]] = load i64, i64* %b.addr.ascast -// CHECK-NEXT: %[[V2:.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 %[[V0]], i64 %[[V1]], i32 35) -// CHECK-NEXT: %[[V3:.*]] = load i64*, i64** %out.addr.ascast -// CHECK-NEXT: store i64 %[[V2]], i64* %[[V3]] -// CHECK-NEXT: ret void +// CHECK-LABEL: @_Z14test_uicmp_i64Pyyy( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64** +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64** +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[A_ADDR]] to i64* +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[B_ADDR]] to i64* +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64* +// CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: [[OUT1:%.*]] = load i64*, i64** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: store i64* [[OUT1]], i64** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 35) +// CHECK-NEXT: [[TMP4:%.*]] = load i64*, i64** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP4]], align 8 +// CHECK-NEXT: ret void +// __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, unsigned long long b) { *out = __builtin_amdgcn_uicmpl(a, b, 30+5); @@ -82,11 +150,21 @@ // Check the 64 bit return value is correctly returned without truncation or assertion. -// CHECK-LABEL: @_Z14test_s_memtime -// CHECK: %[[V1:.*]] = call i64 @llvm.amdgcn.s.memtime() -// CHECK-NEXT: %[[PTR:.*]] = load i64*, i64** %out.addr.ascast -// CHECK-NEXT: store i64 %[[V1]], i64* %[[PTR]] -// CHECK-NEXT: ret void +// CHECK-LABEL: @_Z14test_s_memtimePy( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT]] to i64** +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[OUT_ADDR]] to i64** +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64 addrspace(1)* [[OUT_COERCE:%.*]] to i64* +// CHECK-NEXT: store i64* [[TMP0]], i64** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: [[OUT1:%.*]] = load i64*, i64** [[OUT_ASCAST]], align 8 +// CHECK-NEXT: store i64* [[OUT1]], i64** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.s.memtime() +// CHECK-NEXT: [[TMP2:%.*]] = load i64*, i64** [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 8 +// CHECK-NEXT: ret void +// __global__ void test_s_memtime(unsigned long long* out) { *out = __builtin_amdgcn_s_memtime(); @@ -95,41 +173,55 @@ // Check a generic pointer can be passed as a shared pointer and a generic pointer. __device__ void func(float *x); -// CHECK: @_Z17test_ds_fmin_funcfPf -// CHECK: %[[SHARED:.*]] = alloca float*, align 8, addrspace(5) -// CHECK: %[[SHARED_ASCAST:.*]] = addrspacecast float* addrspace(5)* %[[SHARED]] to float** -// CHECK: %[[SRC_ADDR:.*]] = alloca float, align 4, addrspace(5) -// CHECK: %[[SRC_ADDR_ASCAST:.*]] = addrspacecast float addrspace(5)* %[[SRC_ADDR]] to float* -// CHECK: %[[SHARED_ADDR:.*]] = alloca float*, align 8, addrspace(5) -// CHECK: %[[SHARED_ADDR_ASCAST:.*]] = addrspacecast float* addrspace(5)* %[[SHARED_ADDR]] to float** -// CHECK: %[[X:.*]] = alloca float, align 4, addrspace(5) -// CHECK: %[[X_ASCAST:.*]] = addrspacecast float addrspace(5)* %[[X]] to float* -// CHECK: %[[SHARED1:.*]] = load float*, float** %[[SHARED_ASCAST]], align 8 -// CHECK: store float %src, float* %[[SRC_ADDR_ASCAST]], align 4 -// CHECK: store float* %[[SHARED1]], float** %[[SHARED_ADDR_ASCAST]], align 8 -// CHECK: %[[ARG0_PTR:.*]] = load float*, float** %[[SHARED_ADDR_ASCAST]], align 8 -// CHECK: %[[ARG0:.*]] = addrspacecast float* %[[ARG0_PTR]] to float addrspace(3)* -// CHECK: call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* %[[ARG0]] -// CHECK: %[[ARG0:.*]] = load float*, float** %[[SHARED_ADDR_ASCAST]], align 8 -// CHECK: call void @_Z4funcPf(float* %[[ARG0]]) #8 +// CHECK-LABEL: @_Z17test_ds_fmin_funcfPf( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHARED:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[SHARED_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED]] to float** +// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[SRC_ADDR]] to float* +// CHECK-NEXT: [[SHARED_ADDR:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[SHARED_ADDR]] to float** +// CHECK-NEXT: [[X:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float addrspace(5)* [[X]] to float* +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[SHARED_COERCE:%.*]] to float* +// CHECK-NEXT: store float* [[TMP0]], float** [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: [[SHARED1:%.*]] = load float*, float** [[SHARED_ASCAST]], align 8 +// CHECK-NEXT: store float [[SRC:%.*]], float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float* [[SHARED1]], float** [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float* [[TMP1]] to float addrspace(3)* +// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[SRC_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(float addrspace(3)* [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false) +// CHECK-NEXT: store volatile float [[TMP4]], float* [[X_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float*, float** [[SHARED_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_Z4funcPf(float* [[TMP5]]) #[[ATTR8:[0-9]+]] +// CHECK-NEXT: ret void +// __global__ void test_ds_fmin_func(float src, float *__restrict shared) { volatile float x = __builtin_amdgcn_ds_fminf(shared, src, 0, 0, false); func(shared); } -// CHECK: @_Z14test_is_sharedPf(float addrspace(1)* %[[X_COERCE:.*]]) -// CHECK: %[[X:.*]] = alloca float*, align 8, addrspace(5) -// CHECK: %[[X_ASCAST:.*]] = addrspacecast float* addrspace(5)* %[[X]] to float** -// CHECK: %[[X_ADDR:.*]] = alloca float*, align 8, addrspace(5) -// CHECK: %[[X_ADDR_ASCAST:.*]] = addrspacecast float* addrspace(5)* %[[X_ADDR]] to float** -// CHECK: %[[X_FP:.*]] = addrspacecast float addrspace(1)* %[[X_COERCE]] to float* -// CHECK: store float* %[[X_FP]], float** %[[X_ASCAST]], align 8 -// CHECK: %[[X1:.*]] = load float*, float** %[[X_ASCAST]], align 8 -// CHECK: store float* %[[X1]], float** %[[X_ADDR_ASCAST]], align 8 -// CHECK: %[[X_TMP:.*]] = load float*, float** %[[X_ADDR_ASCAST]], align 8 -// CHECK: %[[X_ARG:.*]] = bitcast float* %[[X_TMP]] to i8* -// CHECK: call i1 @llvm.amdgcn.is.shared(i8* %[[X_ARG]]) +// CHECK-LABEL: @_Z14test_is_sharedPf( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X]] to float** +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca float*, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast float* addrspace(5)* [[X_ADDR]] to float** +// CHECK-NEXT: [[RET:%.*]] = alloca i8, align 1, addrspace(5) +// CHECK-NEXT: [[RET_ASCAST:%.*]] = addrspacecast i8 addrspace(5)* [[RET]] to i8* +// CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float addrspace(1)* [[X_COERCE:%.*]] to float* +// CHECK-NEXT: store float* [[TMP0]], float** [[X_ASCAST]], align 8 +// CHECK-NEXT: [[X1:%.*]] = load float*, float** [[X_ASCAST]], align 8 +// CHECK-NEXT: store float* [[X1]], float** [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float*, float** [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP2]]) +// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP3]] to i8 +// CHECK-NEXT: store i8 [[FROMBOOL]], i8* [[RET_ASCAST]], align 1 +// CHECK-NEXT: ret void +// __global__ void test_is_shared(float *x){ bool ret = __builtin_amdgcn_is_shared(x); } diff --git a/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp b/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp --- a/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp +++ b/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp @@ -1,53 +1,60 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -O0 -triple amdgcn---amdgiz -emit-llvm %s -o - | FileCheck %s -// CHECK-LABEL: define{{.*}} void @_Z5func1Pi(i32* %x) +// CHECK-LABEL: @_Z5func1Pi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[X_ADDR]] to i32** +// CHECK-NEXT: store i32* [[X:%.*]], i32** [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 1, i32* [[TMP0]], align 4 +// CHECK-NEXT: ret void +// void func1(int *x) { - // CHECK: %[[x_addr:.*]] = alloca i32*{{.*}}addrspace(5) - // CHECK: %[[r0:.*]] = addrspacecast i32* addrspace(5)* %[[x_addr]] to i32** - // CHECK: store i32* %x, i32** %[[r0]] - // CHECK: %[[r1:.*]] = load i32*, i32** %[[r0]] - // CHECK: store i32 1, i32* %[[r1]] *x = 1; } -// CHECK-LABEL: define{{.*}} void @_Z5func2v() +// CHECK-LABEL: @_Z5func2v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV1]] to i32* +// CHECK-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LV2]] to i32* +// CHECK-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5) +// CHECK-NEXT: [[LA_ASCAST:%.*]] = addrspacecast [100 x i32] addrspace(5)* [[LA]] to [100 x i32]* +// CHECK-NEXT: [[LP1:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP1]] to i32** +// CHECK-NEXT: [[LP2:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[LP2]] to i32** +// CHECK-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LVC]] to i32* +// CHECK-NEXT: store i32 1, i32* [[LV1_ASCAST]], align 4 +// CHECK-NEXT: store i32 2, i32* [[LV2_ASCAST]], align 4 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[LA_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: store i32 3, i32* [[ARRAYIDX]], align 4 +// CHECK-NEXT: store i32* [[LV1_ASCAST]], i32** [[LP1_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[LA_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: store i32* [[ARRAYDECAY]], i32** [[LP2_ASCAST]], align 8 +// CHECK-NEXT: call void @_Z5func1Pi(i32* [[LV1_ASCAST]]) +// CHECK-NEXT: store i32 4, i32* [[LVC_ASCAST]], align 4 +// CHECK-NEXT: store i32 4, i32* [[LV1_ASCAST]], align 4 +// CHECK-NEXT: ret void +// void func2(void) { - // CHECK: %lv1 = alloca i32, align 4, addrspace(5) - // CHECK: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %lv1 to i32* - // CHECK: %lv2 = alloca i32, align 4, addrspace(5) - // CHECK: %[[r1:.*]] = addrspacecast i32 addrspace(5)* %lv2 to i32* - // CHECK: %la = alloca [100 x i32], align 4, addrspace(5) - // CHECK: %[[r2:.*]] = addrspacecast [100 x i32] addrspace(5)* %la to [100 x i32]* - // CHECK: %lp1 = alloca i32*, align 8, addrspace(5) - // CHECK: %[[r3:.*]] = addrspacecast i32* addrspace(5)* %lp1 to i32** - // CHECK: %lp2 = alloca i32*, align 8, addrspace(5) - // CHECK: %[[r4:.*]] = addrspacecast i32* addrspace(5)* %lp2 to i32** - // CHECK: %lvc = alloca i32, align 4, addrspace(5) - // CHECK: %[[r5:.*]] = addrspacecast i32 addrspace(5)* %lvc to i32* - - // CHECK: store i32 1, i32* %[[r0]] + int lv1; lv1 = 1; - // CHECK: store i32 2, i32* %[[r1]] int lv2 = 2; - // CHECK: %[[arrayidx:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[r2]], i64 0, i64 0 - // CHECK: store i32 3, i32* %[[arrayidx]], align 4 int la[100]; la[0] = 3; - // CHECK: store i32* %[[r0]], i32** %[[r3]], align 8 int *lp1 = &lv1; - // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[r2]], i64 0, i64 0 - // CHECK: store i32* %[[arraydecay]], i32** %[[r4]], align 8 int *lp2 = la; - // CHECK: call void @_Z5func1Pi(i32* %[[r0]]) func1(&lv1); - // CHECK: store i32 4, i32* %[[r5]] - // CHECK: store i32 4, i32* %[[r0]] const int lvc = 4; lv1 = lvc; } @@ -63,38 +70,62 @@ } }; -// CHECK-LABEL: define{{.*}} void @_Z5func3v +// CHECK-LABEL: @_Z5func3v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca [[CLASS_A:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[A]] to %class.A* +// CHECK-NEXT: call void @_ZN1AC1Ev(%class.A* nonnull align 4 dereferenceable(4) [[A_ASCAST]]) +// CHECK-NEXT: call void @_ZN1AD1Ev(%class.A* nonnull align 4 dereferenceable(4) [[A_ASCAST]]) +// CHECK-NEXT: ret void +// void func3() { - // CHECK: %[[a:.*]] = alloca %class.A, align 4, addrspace(5) - // CHECK: %[[r0:.*]] = addrspacecast %class.A addrspace(5)* %[[a]] to %class.A* - // CHECK: call void @_ZN1AC1Ev(%class.A* {{[^,]*}} %[[r0]]) - // CHECK: call void @_ZN1AD1Ev(%class.A* {{[^,]*}} %[[r0]]) A a; } -// CHECK-LABEL: define{{.*}} void @_Z5func4i +// CHECK-LABEL: @_Z5func4i( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[X_ADDR]] to i32* +// CHECK-NEXT: store i32 [[X:%.*]], i32* [[X_ADDR_ASCAST]], align 4 +// CHECK-NEXT: call void @_Z5func1Pi(i32* [[X_ADDR_ASCAST]]) +// CHECK-NEXT: ret void +// void func4(int x) { - // CHECK: %[[x_addr:.*]] = alloca i32, align 4, addrspace(5) - // CHECK: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %[[x_addr]] to i32* - // CHECK: store i32 %x, i32* %[[r0]], align 4 - // CHECK: call void @_Z5func1Pi(i32* %[[r0]]) func1(&x); } -// CHECK-LABEL: define{{.*}} void @_Z5func5v +// CHECK-LABEL: @_Z5func5v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[X]] to i32* +// CHECK-NEXT: ret void +// void func5() { return; int x = 0; } -// CHECK-LABEL: define{{.*}} void @_Z5func6v +// CHECK-LABEL: @_Z5func6v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[X]] to i32* +// CHECK-NEXT: ret void +// void func6() { return; int x; } -// CHECK-LABEL: define{{.*}} void @_Z5func7v extern void use(int *); +// CHECK-LABEL: @_Z5func7v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[X]] to i32* +// CHECK-NEXT: br label [[LATER:%.*]] +// CHECK: later: +// CHECK-NEXT: call void @_Z3usePi(i32* [[X_ASCAST]]) +// CHECK-NEXT: ret void +// void func7() { goto later; int x; @@ -102,4 +133,3 @@ use(&x); } -// CHECK-NOT: !opencl.ocl.version diff --git a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp --- a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp +++ b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -O0 -triple amdgcn -emit-llvm %s -o - | FileCheck %s class A { @@ -17,77 +18,101 @@ void func_with_ref_arg(A &a); void func_with_ref_arg(B &b); -// CHECK-LABEL: define{{.*}} void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %a) -// CHECK: %p = alloca %class.A*, align 8, addrspace(5) -// CHECK: %[[r1:.+]] = addrspacecast %class.A* addrspace(5)* %p to %class.A** -// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A* -// CHECK: store %class.A* %[[r0]], %class.A** %[[r1]], align 8 +// CHECK-LABEL: @_Z22func_with_indirect_arg1A( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[P:%.*]] = alloca %class.A*, align 8, addrspace(5) +// CHECK-NEXT: [[P_ASCAST:%.*]] = addrspacecast %class.A* addrspace(5)* [[P]] to %class.A** +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast [[CLASS_A:%.*]] addrspace(5)* [[A:%.*]] to %class.A* +// CHECK-NEXT: store %class.A* [[A_ASCAST]], %class.A** [[P_ASCAST]], align 8 +// CHECK-NEXT: ret void +// void func_with_indirect_arg(A a) { A *p = &a; } -// CHECK-LABEL: define{{.*}} void @_Z22test_indirect_arg_autov() -// CHECK: %a = alloca %class.A, align 4, addrspace(5) -// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A* -// CHECK: %agg.tmp = alloca %class.A, align 4, addrspace(5) -// CHECK: %[[r1:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A* -// CHECK: call void @_ZN1AC1Ev(%class.A* {{[^,]*}} %[[r0]]) -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -// CHECK: %[[r4:.+]] = addrspacecast %class.A* %[[r1]] to %class.A addrspace(5)* -// CHECK: call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r4]]) -// CHECK: call void @_ZN1AD1Ev(%class.A* {{[^,]*}} %[[r1]]) -// CHECK: call void @_Z17func_with_ref_argR1A(%class.A* nonnull align 4 dereferenceable(4) %[[r0]]) -// CHECK: call void @_ZN1AD1Ev(%class.A* {{[^,]*}} %[[r0]]) +// CHECK-LABEL: @_Z22test_indirect_arg_autov( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca [[CLASS_A:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[A]] to %class.A* +// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_A]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[AGG_TMP]] to %class.A* +// CHECK-NEXT: call void @_ZN1AC1Ev(%class.A* nonnull align 4 dereferenceable(4) [[A_ASCAST]]) +// CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.A* [[AGG_TMP_ASCAST]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = bitcast %class.A* [[A_ASCAST]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[TMP1]], i64 4, i1 false) +// CHECK-NEXT: [[AGG_TMP_ASCAST_ASCAST:%.*]] = addrspacecast %class.A* [[AGG_TMP_ASCAST]] to [[CLASS_A]] addrspace(5)* +// CHECK-NEXT: call void @_Z22func_with_indirect_arg1A([[CLASS_A]] addrspace(5)* [[AGG_TMP_ASCAST_ASCAST]]) +// CHECK-NEXT: call void @_ZN1AD1Ev(%class.A* nonnull align 4 dereferenceable(4) [[AGG_TMP_ASCAST]]) +// CHECK-NEXT: call void @_Z17func_with_ref_argR1A(%class.A* nonnull align 4 dereferenceable(4) [[A_ASCAST]]) +// CHECK-NEXT: call void @_ZN1AD1Ev(%class.A* nonnull align 4 dereferenceable(4) [[A_ASCAST]]) +// CHECK-NEXT: ret void +// void test_indirect_arg_auto() { A a; func_with_indirect_arg(a); func_with_ref_arg(a); } -// CHECK: define{{.*}} void @_Z24test_indirect_arg_globalv() -// CHECK: %agg.tmp = alloca %class.A, align 4, addrspace(5) -// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -// CHECK: %[[r2:.+]] = addrspacecast %class.A* %[[r0]] to %class.A addrspace(5)* -// CHECK: call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r2]]) -// CHECK: call void @_ZN1AD1Ev(%class.A* {{[^,]*}} %[[r0]]) -// CHECK: call void @_Z17func_with_ref_argR1A(%class.A* nonnull align 4 dereferenceable(4) addrspacecast (%class.A addrspace(1)* @g_a to %class.A*)) +// CHECK-LABEL: @_Z24test_indirect_arg_globalv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_A:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_A]] addrspace(5)* [[AGG_TMP]] to %class.A* +// CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.A* [[AGG_TMP_ASCAST]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 addrspacecast (i8 addrspace(1)* bitcast ([[CLASS_A]] addrspace(1)* @g_a to i8 addrspace(1)*) to i8*), i64 4, i1 false) +// CHECK-NEXT: [[AGG_TMP_ASCAST_ASCAST:%.*]] = addrspacecast %class.A* [[AGG_TMP_ASCAST]] to [[CLASS_A]] addrspace(5)* +// CHECK-NEXT: call void @_Z22func_with_indirect_arg1A([[CLASS_A]] addrspace(5)* [[AGG_TMP_ASCAST_ASCAST]]) +// CHECK-NEXT: call void @_ZN1AD1Ev(%class.A* nonnull align 4 dereferenceable(4) [[AGG_TMP_ASCAST]]) +// CHECK-NEXT: call void @_Z17func_with_ref_argR1A(%class.A* nonnull align 4 dereferenceable(4) addrspacecast ([[CLASS_A]] addrspace(1)* @g_a to %class.A*)) +// CHECK-NEXT: ret void +// void test_indirect_arg_global() { func_with_indirect_arg(g_a); func_with_ref_arg(g_a); } -// CHECK-LABEL: define{{.*}} void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval(%class.B) align 4 %b) -// CHECK: %p = alloca %class.B*, align 8, addrspace(5) -// CHECK: %[[r1:.+]] = addrspacecast %class.B* addrspace(5)* %p to %class.B** -// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B* -// CHECK: store %class.B* %[[r0]], %class.B** %[[r1]], align 8 +// CHECK-LABEL: @_Z19func_with_byval_arg1B( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[P:%.*]] = alloca %class.B*, align 8, addrspace(5) +// CHECK-NEXT: [[P_ASCAST:%.*]] = addrspacecast %class.B* addrspace(5)* [[P]] to %class.B** +// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast [[CLASS_B:%.*]] addrspace(5)* [[B:%.*]] to %class.B* +// CHECK-NEXT: store %class.B* [[B_ASCAST]], %class.B** [[P_ASCAST]], align 8 +// CHECK-NEXT: ret void +// void func_with_byval_arg(B b) { B *p = &b; } -// CHECK-LABEL: define{{.*}} void @_Z19test_byval_arg_autov() -// CHECK: %b = alloca %class.B, align 4, addrspace(5) -// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B* -// CHECK: %agg.tmp = alloca %class.B, align 4, addrspace(5) -// CHECK: %[[r1:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -// CHECK: %[[r4:.+]] = addrspacecast %class.B* %[[r1]] to %class.B addrspace(5)* -// CHECK: call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval(%class.B) align 4 %[[r4]]) -// CHECK: call void @_Z17func_with_ref_argR1B(%class.B* nonnull align 4 dereferenceable(400) %[[r0]]) +// CHECK-LABEL: @_Z19test_byval_arg_autov( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[B:%.*]] = alloca [[CLASS_B:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[B]] to %class.B* +// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_B]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[AGG_TMP]] to %class.B* +// CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.B* [[AGG_TMP_ASCAST]] to i8* +// CHECK-NEXT: [[TMP1:%.*]] = bitcast %class.B* [[B_ASCAST]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[TMP1]], i64 400, i1 false) +// CHECK-NEXT: [[AGG_TMP_ASCAST_ASCAST:%.*]] = addrspacecast %class.B* [[AGG_TMP_ASCAST]] to [[CLASS_B]] addrspace(5)* +// CHECK-NEXT: call void @_Z19func_with_byval_arg1B([[CLASS_B]] addrspace(5)* byval([[CLASS_B]]) align 4 [[AGG_TMP_ASCAST_ASCAST]]) +// CHECK-NEXT: call void @_Z17func_with_ref_argR1B(%class.B* nonnull align 4 dereferenceable(400) [[B_ASCAST]]) +// CHECK-NEXT: ret void +// void test_byval_arg_auto() { B b; func_with_byval_arg(b); func_with_ref_arg(b); } -// CHECK-LABEL: define{{.*}} void @_Z21test_byval_arg_globalv() -// CHECK: %agg.tmp = alloca %class.B, align 4, addrspace(5) -// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -// CHECK: %[[r2:.+]] = addrspacecast %class.B* %[[r0]] to %class.B addrspace(5)* -// CHECK: call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval(%class.B) align 4 %[[r2]]) -// CHECK: call void @_Z17func_with_ref_argR1B(%class.B* nonnull align 4 dereferenceable(400) addrspacecast (%class.B addrspace(1)* @g_b to %class.B*)) +// CHECK-LABEL: @_Z21test_byval_arg_globalv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_B:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast [[CLASS_B]] addrspace(5)* [[AGG_TMP]] to %class.B* +// CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.B* [[AGG_TMP_ASCAST]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 addrspacecast (i8 addrspace(1)* bitcast ([[CLASS_B]] addrspace(1)* @g_b to i8 addrspace(1)*) to i8*), i64 400, i1 false) +// CHECK-NEXT: [[AGG_TMP_ASCAST_ASCAST:%.*]] = addrspacecast %class.B* [[AGG_TMP_ASCAST]] to [[CLASS_B]] addrspace(5)* +// CHECK-NEXT: call void @_Z19func_with_byval_arg1B([[CLASS_B]] addrspace(5)* byval([[CLASS_B]]) align 4 [[AGG_TMP_ASCAST_ASCAST]]) +// CHECK-NEXT: call void @_Z17func_with_ref_argR1B(%class.B* nonnull align 4 dereferenceable(400) addrspacecast ([[CLASS_B]] addrspace(1)* @g_b to %class.B*)) +// CHECK-NEXT: ret void +// void test_byval_arg_global() { func_with_byval_arg(g_b); func_with_ref_arg(g_b); diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp --- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp @@ -1,265 +1,352 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \ // RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s +// CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32** +// CHECK-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[RES]] to i32* +// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* [[TMP0]], i32 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* [[RES_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* [[TMP4]], i32 [[TMP6]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP7]], i32* [[RES_ASCAST]], align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) { - // CHECK-LABEL: test_non_volatile_parameter32 __UINT32_TYPE__ res; - // CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5) - // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32** - // CHECK-NEXT: %res = alloca i32, align 4, addrspace(5) - // CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32* - // CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %2 = load i32, i32* %1, align 4 - // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4 res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %6 = load i32, i32* %5, align 4 - // CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4 res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z29test_non_volatile_parameter64Py( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64** +// CHECK-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[RES]] to i64* +// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* [[TMP0]], i64 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* [[RES_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* [[TMP4]], i64 [[TMP6]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP7]], i64* [[RES_ASCAST]], align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) { - // CHECK-LABEL: test_non_volatile_parameter64 __UINT64_TYPE__ res; - // CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5) - // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64** - // CHECK-NEXT: %res = alloca i64, align 8, addrspace(5) - // CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64* - // CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %2 = load i64, i64* %1, align 8 - // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8 res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %6 = load i64, i64* %5, align 8 - // CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8 res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z25test_volatile_parameter32PVj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i32* addrspace(5)* [[PTR_ADDR]] to i32** +// CHECK-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[RES]] to i32* +// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* [[TMP0]], i32 [[TMP2]], i32 7, i32 2, i1 true) +// CHECK-NEXT: store i32 [[TMP3]], i32* [[RES_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load volatile i32, i32* [[TMP5]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* [[TMP4]], i32 [[TMP6]], i32 7, i32 2, i1 true) +// CHECK-NEXT: store i32 [[TMP7]], i32* [[RES_ASCAST]], align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ *ptr) { - // CHECK-LABEL: test_volatile_parameter32 __UINT32_TYPE__ res; - // CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5) - // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32** - // CHECK-NEXT: %res = alloca i32, align 4, addrspace(5) - // CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32* - // CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %2 = load volatile i32, i32* %1, align 4 - // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 true) - // CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4 res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %6 = load volatile i32, i32* %5, align 4 - // CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 true) - // CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4 res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z25test_volatile_parameter64PVy( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i64*, align 8, addrspace(5) +// CHECK-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast i64* addrspace(5)* [[PTR_ADDR]] to i64** +// CHECK-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[RES_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[RES]] to i64* +// CHECK-NEXT: store i64* [[PTR:%.*]], i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load volatile i64, i64* [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* [[TMP0]], i64 [[TMP2]], i32 7, i32 2, i1 true) +// CHECK-NEXT: store i64 [[TMP3]], i64* [[RES_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i64*, i64** [[PTR_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load volatile i64, i64* [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* [[TMP4]], i64 [[TMP6]], i32 7, i32 2, i1 true) +// CHECK-NEXT: store i64 [[TMP7]], i64* [[RES_ASCAST]], align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ *ptr) { - // CHECK-LABEL: test_volatile_parameter64 __UINT64_TYPE__ res; - // CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5) - // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64** - // CHECK-NEXT: %res = alloca i64, align 8, addrspace(5) - // CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64* - // CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %2 = load volatile i64, i64* %1, align 8 - // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 true) - // CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8 res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8 - // CHECK-NEXT: %6 = load volatile i64, i64* %5, align 8 - // CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 true) - // CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8 res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z13test_shared32v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_shared32() { - // CHECK-LABEL: test_shared32 __attribute__((shared)) __UINT32_TYPE__ val; - // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 - // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 - // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4 val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z13test_shared64v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_shared64() { - // CHECK-LABEL: test_shared64 __attribute__((shared)) __UINT64_TYPE__ val; - // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 - // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 - // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8 val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); } __attribute__((device)) __UINT32_TYPE__ global_val32; +// CHECK-LABEL: @_Z13test_global32v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_global32() { - // CHECK-LABEL: test_global32 - // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 - // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 - // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4 global_val32 = __builtin_amdgcn_atomic_dec32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup"); } __attribute__((device)) __UINT64_TYPE__ global_val64; +// CHECK-LABEL: @_Z13test_global64v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_global64() { - // CHECK-LABEL: test_global64 - // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 - // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 - // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8 global_val64 = __builtin_amdgcn_atomic_dec64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup"); } __attribute__((constant)) __UINT32_TYPE__ cval32; +// CHECK-LABEL: @_Z15test_constant32v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[LOCAL_VAL]] to i32* +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP1]], i32* [[LOCAL_VAL_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* [[LOCAL_VAL_ASCAST]], align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_constant32() { - // CHECK-LABEL: test_constant32 __UINT32_TYPE__ local_val; - // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4 - // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %1, i32* %local_val.ascast, align 4 local_val = __builtin_amdgcn_atomic_inc32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4 - // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i32 %3, i32* %local_val.ascast, align 4 local_val = __builtin_amdgcn_atomic_dec32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup"); } __attribute__((constant)) __UINT64_TYPE__ cval64; +// CHECK-LABEL: @_Z15test_constant64v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast i64 addrspace(5)* [[LOCAL_VAL]] to i64* +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 [[TMP0]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP1]], i64* [[LOCAL_VAL_ASCAST]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* [[LOCAL_VAL_ASCAST]], align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_constant64() { - // CHECK-LABEL: test_constant64 __UINT64_TYPE__ local_val; - // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8 - // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %0, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %1, i64* %local_val.ascast, align 8 local_val = __builtin_amdgcn_atomic_inc64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8 - // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %2, i32 7, i32 2, i1 false) - // CHECK-NEXT: store i64 %3, i64* %local_val.ascast, align 8 local_val = __builtin_amdgcn_atomic_dec64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z12test_order32v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP0]], i32 2, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP2]], i32 4, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP4]], i32 4, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP5]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP6]], i32 5, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP7]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP8]], i32 6, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP9]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 [[TMP10]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP11]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_order32() { - // CHECK-LABEL: test_order32 __attribute__((shared)) __UINT32_TYPE__ val; - // CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 2, i32 2, i1 false) val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_RELAXED, "workgroup"); - // CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 4, i32 2, i1 false) val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_CONSUME, "workgroup"); - // CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 4, i32 2, i1 false) val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, "workgroup"); - // CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 5, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_RELEASE, "workgroup"); - // CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 6, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQ_REL, "workgroup"); - // CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 {{.*}}, i32 7, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z12test_order64v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP0]], i32 2, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP2]], i32 4, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP4]], i32 4, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP5]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP6]], i32 5, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP7]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP8]], i32 6, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP9]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 [[TMP10]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP11]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_order64() { - // CHECK-LABEL: test_order64 __attribute__((shared)) __UINT64_TYPE__ val; - // CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 2, i32 2, i1 false) val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_RELAXED, "workgroup"); - // CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 4, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_CONSUME, "workgroup"); - // CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 4, i32 2, i1 false) val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, "workgroup"); - // CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 5, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_RELEASE, "workgroup"); - // CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 6, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQ_REL, "workgroup"); - // CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 {{.*}}, i32 7, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); } +// CHECK-LABEL: @_Z12test_scope32v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP0]], i32 7, i32 1, i1 false) +// CHECK-NEXT: store i32 [[TMP1]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i32 [[TMP3]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP4]], i32 7, i32 3, i1 false) +// CHECK-NEXT: store i32 [[TMP5]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 [[TMP6]], i32 7, i32 4, i1 false) +// CHECK-NEXT: store i32 [[TMP7]], i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), align 4 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_scope32() { - // CHECK-LABEL: test_scope32 __attribute__((shared)) __UINT32_TYPE__ val; - // CHECK: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %0, i32 7, i32 1, i1 false) val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, ""); - // CHECK: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %4, i32 7, i32 3, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "agent"); - // CHECK: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %6, i32 7, i32 4, i1 false) val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "wavefront"); } +// CHECK-LABEL: @_Z12test_scope64v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP0]], i32 7, i32 1, i1 false) +// CHECK-NEXT: store i64 [[TMP1]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP2]], i32 7, i32 2, i1 false) +// CHECK-NEXT: store i64 [[TMP3]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP4]], i32 7, i32 3, i1 false) +// CHECK-NEXT: store i64 [[TMP5]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 [[TMP6]], i32 7, i32 4, i1 false) +// CHECK-NEXT: store i64 [[TMP7]], i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), align 8 +// CHECK-NEXT: ret void +// __attribute__((device)) void test_scope64() { - // CHECK-LABEL: test_scope64 __attribute__((shared)) __UINT64_TYPE__ val; - // CHECK: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %0, i32 7, i32 1, i1 false) val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, ""); - // CHECK: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: %5 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %4, i32 7, i32 3, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "agent"); - // CHECK: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %6, i32 7, i32 4, i1 false) val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "wavefront"); } diff --git a/clang/test/CodeGenSYCL/address-space-deduction.cpp b/clang/test/CodeGenSYCL/address-space-deduction.cpp --- a/clang/test/CodeGenSYCL/address-space-deduction.cpp +++ b/clang/test/CodeGenSYCL/address-space-deduction.cpp @@ -1,73 +1,129 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple spir64 -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s -// CHECK: @_ZZ4testvE3foo = internal addrspace(1) constant i32 66, align 4 -// CHECK: @[[STR:[.a-zA-Z0-9_]+]] = private unnamed_addr addrspace(1) constant [14 x i8] c"Hello, world!\00", align 1 -// CHECK-LABEL: @_Z4testv +// CHECK-LABEL: @_Z4testv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast i32* [[I]] to i32 addrspace(4)* +// CHECK-NEXT: [[PPTR:%.*]] = alloca i32 addrspace(4)*, align 8 +// CHECK-NEXT: [[PPTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[PPTR]] to i32 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[IS_I_PTR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[IS_I_PTR_ASCAST:%.*]] = addrspacecast i8* [[IS_I_PTR]] to i8 addrspace(4)* +// CHECK-NEXT: [[VAR23:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[VAR23_ASCAST:%.*]] = addrspacecast i32* [[VAR23]] to i32 addrspace(4)* +// CHECK-NEXT: [[CP:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[CP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CP]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[ARR:%.*]] = alloca [42 x i32], align 4 +// CHECK-NEXT: [[ARR_ASCAST:%.*]] = addrspacecast [42 x i32]* [[ARR]] to [42 x i32] addrspace(4)* +// CHECK-NEXT: [[CPP:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[CPP_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[CPP]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[APTR:%.*]] = alloca i32 addrspace(4)*, align 8 +// CHECK-NEXT: [[APTR_ASCAST:%.*]] = addrspacecast i32 addrspace(4)** [[APTR]] to i32 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[STR:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[STR]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[PHI_STR:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[PHI_STR_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[PHI_STR]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[SELECT_NULL:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[SELECT_NULL_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_NULL]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[SELECT_STR_TRIVIAL1:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[SELECT_STR_TRIVIAL1_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_STR_TRIVIAL1]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[SELECT_STR_TRIVIAL2:%.*]] = alloca i8 addrspace(4)*, align 8 +// CHECK-NEXT: [[SELECT_STR_TRIVIAL2_ASCAST:%.*]] = addrspacecast i8 addrspace(4)** [[SELECT_STR_TRIVIAL2]] to i8 addrspace(4)* addrspace(4)* +// CHECK-NEXT: store i32 0, i32 addrspace(4)* [[I_ASCAST]], align 4 +// CHECK-NEXT: store i32 addrspace(4)* [[I_ASCAST]], i32 addrspace(4)* addrspace(4)* [[PPTR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[PPTR_ASCAST]], align 8 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 addrspace(4)* [[TMP0]], [[I_ASCAST]] +// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[CMP]] to i8 +// CHECK-NEXT: store i8 [[FROMBOOL]], i8 addrspace(4)* [[IS_I_PTR_ASCAST]], align 1 +// CHECK-NEXT: [[TMP1:%.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[PPTR_ASCAST]], align 8 +// CHECK-NEXT: store i32 66, i32 addrspace(4)* [[TMP1]], align 4 +// CHECK-NEXT: store i32 23, i32 addrspace(4)* [[VAR23_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 addrspace(4)* [[VAR23_ASCAST]] to i8 addrspace(4)* +// CHECK-NEXT: store i8 addrspace(4)* [[TMP2]], i8 addrspace(4)* addrspace(4)* [[CP_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[CP_ASCAST]], align 8 +// CHECK-NEXT: store i8 41, i8 addrspace(4)* [[TMP3]], align 1 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* [[ARR_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 addrspace(4)* [[ARRAYDECAY]] to i8 addrspace(4)* +// CHECK-NEXT: store i8 addrspace(4)* [[TMP4]], i8 addrspace(4)* addrspace(4)* [[CPP_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[CPP_ASCAST]], align 8 +// CHECK-NEXT: store i8 43, i8 addrspace(4)* [[TMP5]], align 1 +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* [[ARR_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[ARRAYDECAY1]], i64 10 +// CHECK-NEXT: store i32 addrspace(4)* [[ADD_PTR]], i32 addrspace(4)* addrspace(4)* [[APTR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[APTR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* [[ARR_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i32, i32 addrspace(4)* [[ARRAYDECAY2]], i64 168 +// CHECK-NEXT: [[CMP4:%.*]] = icmp ult i32 addrspace(4)* [[TMP6]], [[ADD_PTR3]] +// CHECK-NEXT: br i1 [[CMP4]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +// CHECK: if.then: +// CHECK-NEXT: [[TMP7:%.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[APTR_ASCAST]], align 8 +// CHECK-NEXT: store i32 44, i32 addrspace(4)* [[TMP7]], align 4 +// CHECK-NEXT: br label [[IF_END]] +// CHECK: if.end: +// CHECK-NEXT: store i8 addrspace(4)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(4)* addrspacecast ([14 x i8] addrspace(1)* @.str to [14 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* [[STR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[STR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[TMP8]], i64 0 +// CHECK-NEXT: [[TMP9:%.*]] = load i8, i8 addrspace(4)* [[ARRAYIDX]], align 1 +// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP9]] to i32 +// CHECK-NEXT: store i32 [[CONV]], i32 addrspace(4)* [[I_ASCAST]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32 addrspace(4)* [[I_ASCAST]], align 4 +// CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], 2 +// CHECK-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP11:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[STR_ASCAST]], align 8 +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i8 addrspace(4)* [ [[TMP11]], [[COND_TRUE]] ], [ getelementptr inbounds ([21 x i8], [21 x i8] addrspace(4)* addrspacecast ([21 x i8] addrspace(1)* @.str.1 to [21 x i8] addrspace(4)*), i64 0, i64 0), [[COND_FALSE]] ] +// CHECK-NEXT: store i8 addrspace(4)* [[COND]], i8 addrspace(4)* addrspace(4)* [[PHI_STR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[I_ASCAST]], align 4 +// CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], 2 +// CHECK-NEXT: [[TMP13:%.*]] = zext i1 [[CMP6]] to i64 +// CHECK-NEXT: [[COND7:%.*]] = select i1 [[CMP6]], i8 addrspace(4)* getelementptr inbounds ([24 x i8], [24 x i8] addrspace(4)* addrspacecast ([24 x i8] addrspace(1)* @.str.2 to [24 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* null +// CHECK-NEXT: store i8 addrspace(4)* [[COND7]], i8 addrspace(4)* addrspace(4)* [[SELECT_NULL_ASCAST]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[STR_ASCAST]], align 8 +// CHECK-NEXT: store i8 addrspace(4)* [[TMP14]], i8 addrspace(4)* addrspace(4)* [[SELECT_STR_TRIVIAL1_ASCAST]], align 8 +// CHECK-NEXT: store i8 addrspace(4)* getelementptr inbounds ([21 x i8], [21 x i8] addrspace(4)* addrspacecast ([21 x i8] addrspace(1)* @.str.1 to [21 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* [[SELECT_STR_TRIVIAL2_ASCAST]], align 8 +// CHECK-NEXT: ret void +// void test() { static const int foo = 0x42; - // CHECK: %i.ascast = addrspacecast i32* %i to i32 addrspace(4)* - // CHECK: %[[ARR:[a-zA-Z0-9]+]] = alloca [42 x i32] - // CHECK: %[[ARR]].ascast = addrspacecast [42 x i32]* %[[ARR]] to [42 x i32] addrspace(4)* int i = 0; int *pptr = &i; - // CHECK: store i32 addrspace(4)* %i.ascast, i32 addrspace(4)* addrspace(4)* %pptr.ascast bool is_i_ptr = (pptr == &i); - // CHECK: %[[VALPPTR:[0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %pptr.ascast - // CHECK: %cmp{{[0-9]*}} = icmp eq i32 addrspace(4)* %[[VALPPTR]], %i.ascast *pptr = foo; int var23 = 23; char *cp = (char *)&var23; *cp = 41; - // CHECK: store i32 23, i32 addrspace(4)* %[[VAR:[a-zA-Z0-9.]+]] - // CHECK: [[VARCAST:%.*]] = bitcast i32 addrspace(4)* %[[VAR]] to i8 addrspace(4)* - // CHECK: store i8 addrspace(4)* [[VARCAST]], i8 addrspace(4)* addrspace(4)* %{{.*}} int arr[42]; char *cpp = (char *)arr; *cpp = 43; - // CHECK: [[ARRDECAY:%.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* %[[ARR]].ascast, i64 0, i64 0 - // CHECK: [[ARRCAST:%.*]] = bitcast i32 addrspace(4)* [[ARRDECAY]] to i8 addrspace(4)* - // CHECK: store i8 addrspace(4)* [[ARRCAST]], i8 addrspace(4)* addrspace(4)* %{{.*}} int *aptr = arr + 10; if (aptr < arr + sizeof(arr)) *aptr = 44; - // CHECK: %[[VALAPTR:.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %aptr.ascast - // CHECK: %[[ARRDCY2:.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* %[[ARR]].ascast, i64 0, i64 0 - // CHECK: %[[ADDPTR:.*]] = getelementptr inbounds i32, i32 addrspace(4)* %[[ARRDCY2]], i64 168 - // CHECK: %cmp{{[0-9]+}} = icmp ult i32 addrspace(4)* %[[VALAPTR]], %[[ADDPTR]] const char *str = "Hello, world!"; - // CHECK: store i8 addrspace(4)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(4)* addrspacecast ([14 x i8] addrspace(1)* @[[STR]] to [14 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* %[[STRVAL:[a-zA-Z0-9]+]].ascast, align 8 i = str[0]; const char *phi_str = i > 2 ? str : "Another hello world!"; (void)phi_str; - // CHECK: %[[COND:[a-zA-Z0-9]+]] = icmp sgt i32 %{{.*}}, 2 - // CHECK: br i1 %[[COND]], label %[[CONDTRUE:[.a-zA-Z0-9]+]], label %[[CONDFALSE:[.a-zA-Z0-9]+]] - // CHECK: [[CONDTRUE]]: - // CHECK-NEXT: %[[VALTRUE:[a-zA-Z0-9]+]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %str.ascast - // CHECK-NEXT: br label %[[CONDEND:[.a-zA-Z0-9]+]] - // CHECK: [[CONDFALSE]]: - // CHECK: [[CONDEND]]: - // CHECK-NEXT: phi i8 addrspace(4)* [ %[[VALTRUE]], %[[CONDTRUE]] ], [ getelementptr inbounds ([21 x i8], [21 x i8] addrspace(4)* addrspacecast ([21 x i8] addrspace(1)* @{{.*}} to [21 x i8] addrspace(4)*), i64 0, i64 0), %[[CONDFALSE]] ] const char *select_null = i > 2 ? "Yet another Hello world" : nullptr; (void)select_null; - // CHECK: select i1 %{{.*}}, i8 addrspace(4)* getelementptr inbounds ([24 x i8], [24 x i8] addrspace(4)* addrspacecast ([24 x i8] addrspace(1)* @{{.*}} to [24 x i8] addrspace(4)*), i64 0, i64 0) const char *select_str_trivial1 = true ? str : "Another hello world!"; (void)select_str_trivial1; - // CHECK: %[[TRIVIALTRUE:[a-zA-Z0-9]+]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[STRVAL]] - // CHECK: store i8 addrspace(4)* %[[TRIVIALTRUE]], i8 addrspace(4)* addrspace(4)* %{{.*}}, align 8 const char *select_str_trivial2 = false ? str : "Another hello world!"; (void)select_str_trivial2;