Index: include/clang/AST/ASTContext.h =================================================================== --- include/clang/AST/ASTContext.h +++ include/clang/AST/ASTContext.h @@ -2466,6 +2466,8 @@ unsigned getTargetAddressSpace(LangAS AS) const; + LangAS getLangASForBuiltinAddressSpace(unsigned AS) const; + /// Get target-dependent integer value for null pointer which is used for /// constant folding. uint64_t getTargetNullPointerValue(QualType QT) const; Index: include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- include/clang/Basic/BuiltinsAMDGPU.def +++ include/clang/Basic/BuiltinsAMDGPU.def @@ -21,9 +21,9 @@ // SI+ only builtins. //===----------------------------------------------------------------------===// -BUILTIN(__builtin_amdgcn_dispatch_ptr, "Uc*4", "nc") -BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*4", "nc") -BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*4", "nc") +BUILTIN(__builtin_amdgcn_dispatch_ptr, "v*4", "nc") +BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "v*4", "nc") +BUILTIN(__builtin_amdgcn_implicitarg_ptr, "v*4", "nc") BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc") BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc") @@ -45,6 +45,8 @@ BUILTIN(__builtin_amdgcn_wave_barrier, "v", "n") BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n") BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n") + +// FIXME: Need to disallow constant address space. BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n") BUILTIN(__builtin_amdgcn_div_fmas, "ddddb", "nc") @@ -93,9 +95,9 @@ BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc") BUILTIN(__builtin_amdgcn_readlane, "iii", "nc") BUILTIN(__builtin_amdgcn_fmed3f, "ffff", "nc") -BUILTIN(__builtin_amdgcn_ds_faddf, "ff*fIiIiIb", "n") -BUILTIN(__builtin_amdgcn_ds_fminf, "ff*fIiIiIb", "n") -BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*fIiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_faddf, "ff*3fIiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n") //===----------------------------------------------------------------------===// // VI+ only builtins. Index: include/clang/Basic/TargetInfo.h =================================================================== --- include/clang/Basic/TargetInfo.h +++ include/clang/Basic/TargetInfo.h @@ -1167,6 +1167,18 @@ const LangASMap &getAddressSpaceMap() const { return *AddrSpaceMap; } + /// Map from the address space field in builtin description strings to the + /// language address space. + virtual LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const { + return getLangASFromTargetAS(AS); + } + + /// Map from the address space field in builtin description strings to the + /// language address space. + virtual LangAS getCUDABuiltinAddressSpace(unsigned AS) const { + return getLangASFromTargetAS(AS); + } + /// Return an AST address space which can be used opportunistically /// for constant global memory. It must be possible to convert pointers into /// this address space to LangAS::Default. If no such address space exists, Index: lib/AST/ASTContext.cpp =================================================================== --- lib/AST/ASTContext.cpp +++ lib/AST/ASTContext.cpp @@ -9352,9 +9352,11 @@ // qualified with an address space. char *End; unsigned AddrSpace = strtoul(Str, &End, 10); - if (End != Str && AddrSpace != 0) { - Type = Context.getAddrSpaceQualType(Type, - getLangASFromTargetAS(AddrSpace)); + if (End != Str) { + // Note AddrSpace == 0 is not the same as an unspecified address space. + Type = Context.getAddrSpaceQualType( + Type, + Context.getLangASForBuiltinAddressSpace(AddrSpace)); Str = End; } if (c == '*') @@ -10311,6 +10313,16 @@ } } +LangAS ASTContext::getLangASForBuiltinAddressSpace(unsigned AS) const { + if (LangOpts.OpenCL) + return getTargetInfo().getOpenCLBuiltinAddressSpace(AS); + + if (LangOpts.CUDA) + return getTargetInfo().getCUDABuiltinAddressSpace(AS); + + return getLangASFromTargetAS(AS); +} + // Explicitly instantiate this in case a Redeclarable is used from a TU that // doesn't include ASTContext.h template Index: lib/Basic/Targets/AMDGPU.h =================================================================== --- lib/Basic/Targets/AMDGPU.h +++ lib/Basic/Targets/AMDGPU.h @@ -378,6 +378,27 @@ } } + LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const override { + switch (AS) { + case 0: + return LangAS::opencl_generic; + case 1: + return LangAS::opencl_global; + case 3: + return LangAS::opencl_local; + case 4: + return LangAS::opencl_constant; + case 5: + return LangAS::opencl_private; + default: + return getLangASFromTargetAS(AS); + } + } + + LangAS getCUDABuiltinAddressSpace(unsigned AS) const override { + return LangAS::Default; + } + llvm::Optional getConstantAddressSpace() const override { return getLangASFromTargetAS(Constant); } Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -3703,6 +3703,16 @@ // we need to do a bit cast. llvm::Type *PTy = FTy->getParamType(i); if (PTy != ArgValue->getType()) { + // XXX - vector of pointers? + if (auto *PtrTy = dyn_cast(PTy)) { + if (PtrTy->getAddressSpace() != + ArgValue->getType()->getPointerAddressSpace()) { + ArgValue = Builder.CreateAddrSpaceCast( + ArgValue, + ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace())); + } + } + assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && "Must be able to losslessly bit cast to param"); ArgValue = Builder.CreateBitCast(ArgValue, PTy); @@ -3719,6 +3729,14 @@ RetTy = ConvertType(BuiltinRetType); if (RetTy != V->getType()) { + // XXX - vector of pointers? + if (auto *PtrTy = dyn_cast(RetTy)) { + if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) { + V = Builder.CreateAddrSpaceCast( + V, V->getType()->getPointerTo(PtrTy->getAddressSpace())); + } + } + assert(V->getType()->canLosslesslyBitCastTo(RetTy) && "Must be able to losslessly bit cast result type"); V = Builder.CreateBitCast(V, RetTy); @@ -11039,50 +11057,6 @@ CI->setConvergent(); return CI; } - case AMDGPU::BI__builtin_amdgcn_ds_faddf: - case AMDGPU::BI__builtin_amdgcn_ds_fminf: - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { - llvm::SmallVector Args; - for (unsigned I = 0; I != 5; ++I) - Args.push_back(EmitScalarExpr(E->getArg(I))); - const llvm::Type *PtrTy = Args[0]->getType(); - // check pointer parameter - if (!PtrTy->isPointerTy() || - E->getArg(0) - ->getType() - ->getPointeeType() - .getQualifiers() - .getAddressSpace() != LangAS::opencl_local || - !PtrTy->getPointerElementType()->isFloatTy()) { - CGM.Error(E->getArg(0)->getLocStart(), - "parameter should have type \"local float*\""); - return nullptr; - } - // check float parameter - if (!Args[1]->getType()->isFloatTy()) { - CGM.Error(E->getArg(1)->getLocStart(), - "parameter should have type \"float\""); - return nullptr; - } - - Intrinsic::ID ID; - switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_ds_faddf: - ID = Intrinsic::amdgcn_ds_fadd; - break; - case AMDGPU::BI__builtin_amdgcn_ds_fminf: - ID = Intrinsic::amdgcn_ds_fmin; - break; - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: - ID = Intrinsic::amdgcn_ds_fmax; - break; - default: - llvm_unreachable("Unknown BuiltinID"); - } - Value *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, Args); - } - // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -5153,10 +5153,13 @@ continue; } + QualType PointeeType = ParamType->getPointeeType(); + if (PointeeType.getQualifiers().hasAddressSpace()) + continue; + NeedsNewDecl = true; LangAS AS = ArgType->getPointeeType().getAddressSpace(); - QualType PointeeType = ParamType->getPointeeType(); PointeeType = Context.getAddrSpaceQualType(PointeeType, AS); OverloadParams.push_back(Context.getPointerType(PointeeType)); } Index: test/CodeGenCUDA/builtins-amdgcn.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/builtins-amdgcn.cu @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +#include "Inputs/cuda.h" + +// CHECK-LABEL: @_Z16use_dispatch_ptrPi( +// CHECK: %2 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +// CHECK: %3 = addrspacecast i8 addrspace(4)* %2 to i8 addrspace(4)** +__global__ void use_dispatch_ptr(int* out) { + const int* dispatch_ptr = (const int*)__builtin_amdgcn_dispatch_ptr(); + *out = *dispatch_ptr; +} + +// CHECK-LABEL: @_Z12test_ds_fmaxf( +// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float %2, i32 0, i32 0, i1 false) +__global__ +void test_ds_fmax(float src) { + __shared__ float shared; + volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false); +} Index: test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- test/CodeGenOpenCL/builtins-amdgcn.cl +++ test/CodeGenOpenCL/builtins-amdgcn.cl @@ -1,6 +1,5 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown-opencl -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s #pragma OPENCL EXTENSION cl_khr_fp64 : enable @@ -20,19 +19,42 @@ *flagout = flag; } -// CHECK-LABEL: @test_div_scale_f32 +// CHECK-LABEL: @test_div_scale_f32( // CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) // CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 // CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 -// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 -// CHECK: store i32 [[FLAGEXT]] -void test_div_scale_f32(global float* out, global int* flagout, float a, float b) +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i8 +// CHECK: store i8 [[FLAGEXT]] +void test_div_scale_f32(global float* out, global bool* flagout, float a, float b) { bool flag; *out = __builtin_amdgcn_div_scalef(a, b, true, &flag); *flagout = flag; } +// CHECK-LABEL: @test_div_scale_f32_global_ptr( +// CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i8 +// CHECK: store i8 [[FLAGEXT]] +void test_div_scale_f32_global_ptr(global float* out, global int* flagout, float a, float b, global bool* flag) +{ + *out = __builtin_amdgcn_div_scalef(a, b, true, flag); +} + +// CHECK-LABEL: @test_div_scale_f32_generic_ptr( +// CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i8 +// CHECK: store i8 [[FLAGEXT]] +void test_div_scale_f32_generic_ptr(global float* out, global int* flagout, float a, float b, global bool* flag_arg) +{ + generic bool* flag = flag_arg; + *out = __builtin_amdgcn_div_scalef(a, b, true, flag); +} + // CHECK-LABEL: @test_div_fmas_f32 // CHECK: call float @llvm.amdgcn.div.fmas.f32 void test_div_fmas_f32(global float* out, float a, float b, float c, int d) @@ -414,42 +436,42 @@ } // CHECK-LABEL: @test_read_exec( -// CHECK: call i64 @llvm.read_register.i64(metadata ![[EXEC:[0-9]+]]) #[[READ_EXEC_ATTRS:[0-9]+]] +// CHECK: call i64 @llvm.read_register.i64(metadata ![[$EXEC:[0-9]+]]) #[[$READ_EXEC_ATTRS:[0-9]+]] void test_read_exec(global ulong* out) { *out = __builtin_amdgcn_read_exec(); } -// CHECK: declare i64 @llvm.read_register.i64(metadata) #[[NOUNWIND_READONLY:[0-9]+]] +// CHECK: declare i64 @llvm.read_register.i64(metadata) #[[$NOUNWIND_READONLY:[0-9]+]] // CHECK-LABEL: @test_read_exec_lo( -// CHECK: call i32 @llvm.read_register.i32(metadata ![[EXEC_LO:[0-9]+]]) #[[READ_EXEC_ATTRS]] +// CHECK: call i32 @llvm.read_register.i32(metadata ![[$EXEC_LO:[0-9]+]]) #[[$READ_EXEC_ATTRS]] void test_read_exec_lo(global uint* out) { *out = __builtin_amdgcn_read_exec_lo(); } // CHECK-LABEL: @test_read_exec_hi( -// CHECK: call i32 @llvm.read_register.i32(metadata ![[EXEC_HI:[0-9]+]]) #[[READ_EXEC_ATTRS]] +// CHECK: call i32 @llvm.read_register.i32(metadata ![[$EXEC_HI:[0-9]+]]) #[[$READ_EXEC_ATTRS]] void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); } // CHECK-LABEL: @test_dispatch_ptr // CHECK: call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -void test_dispatch_ptr(__attribute__((address_space(4))) unsigned char ** out) +void test_dispatch_ptr(__constant unsigned char ** out) { *out = __builtin_amdgcn_dispatch_ptr(); } // CHECK-LABEL: @test_kernarg_segment_ptr // CHECK: call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() -void test_kernarg_segment_ptr(__attribute__((address_space(4))) unsigned char ** out) +void test_kernarg_segment_ptr(__constant unsigned char ** out) { *out = __builtin_amdgcn_kernarg_segment_ptr(); } // CHECK-LABEL: @test_implicitarg_ptr // CHECK: call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() -void test_implicitarg_ptr(__attribute__((address_space(4))) unsigned char ** out) +void test_implicitarg_ptr(__constant unsigned char ** out) { *out = __builtin_amdgcn_implicitarg_ptr(); } @@ -480,9 +502,9 @@ } // CHECK-LABEL: @test_get_local_id( -// CHECK: tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[WI_RANGE:![0-9]*]] -// CHECK: tail call i32 @llvm.amdgcn.workitem.id.y(), !range [[WI_RANGE]] -// CHECK: tail call i32 @llvm.amdgcn.workitem.id.z(), !range [[WI_RANGE]] +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[$WI_RANGE:![0-9]*]] +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.y(), !range [[$WI_RANGE]] +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.z(), !range [[$WI_RANGE]] void test_get_local_id(int d, global int *out) { switch (d) { @@ -507,9 +529,9 @@ *out = __builtin_amdgcn_s_getpc(); } -// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024} -// CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } -// CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent } -// CHECK-DAG: ![[EXEC]] = !{!"exec"} -// CHECK-DAG: ![[EXEC_LO]] = !{!"exec_lo"} -// CHECK-DAG: ![[EXEC_HI]] = !{!"exec_hi"} +// CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} +// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } +// CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } +// CHECK-DAG: ![[$EXEC]] = !{!"exec"} +// CHECK-DAG: ![[$EXEC_LO]] = !{!"exec_lo"} +// CHECK-DAG: ![[$EXEC_HI]] = !{!"exec_hi"} Index: test/CodeGenOpenCL/numbered-address-space.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/numbered-address-space.cl @@ -0,0 +1,34 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu tonga -S -emit-llvm -O0 -o - %s | FileCheck %s + +// Make sure using numbered address spaces doesn't trigger crashes when a +// builtin has an address space parameter. + +// CHECK-LABEL: @test_numbered_as_to_generic( +// CHECK: addrspacecast i32 addrspace(42)* %0 to i32* +void test_numbered_as_to_generic(__attribute__((address_space(42))) int *arbitary_numbered_ptr) { + generic int* generic_ptr = arbitary_numbered_ptr; + *generic_ptr = 4; +} + +// CHECK-LABEL: @test_numbered_as_to_builtin( +// CHECK: addrspacecast i32 addrspace(42)* %0 to float addrspace(3)* +void test_numbered_as_to_builtin(__attribute__((address_space(42))) int *arbitary_numbered_ptr, float src) { + volatile float result = __builtin_amdgcn_ds_fmaxf(arbitary_numbered_ptr, src, 0, 0, false); +} + +// CHECK-LABEL: @test_generic_as_to_builtin_parameter_explicit_cast( +// CHECK: addrspacecast i32 addrspace(3)* %0 to i32* +void test_generic_as_to_builtin_parameter_explicit_cast(__local int *local_ptr, float src) { + generic int* generic_ptr = local_ptr; + volatile float result = __builtin_amdgcn_ds_fmaxf((__local float*) generic_ptr, src, 0, 0, false); +} + +// CHECK-LABEL: @test_generic_as_to_builtin_parameter_implicit_cast( +// CHECK: addrspacecast i32* %2 to float addrspace(3)* +void test_generic_as_to_builtin_parameter_implicit_cast(__local int *local_ptr, float src) { + generic int* generic_ptr = local_ptr; + + volatile float result = __builtin_amdgcn_ds_fmaxf(generic_ptr, src, 0, 0, false); +} + Index: test/SemaOpenCL/numbered-address-space.cl =================================================================== --- /dev/null +++ test/SemaOpenCL/numbered-address-space.cl @@ -0,0 +1,31 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -verify -pedantic -fsyntax-only %s + +void test_numeric_as_to_generic_implicit_cast(__attribute__((address_space(3))) int *as3_ptr, float src) { + generic int* generic_ptr = as3_ptr; // FIXME: This should error +} + +void test_numeric_as_to_generic_explicit_cast(__attribute__((address_space(3))) int *as3_ptr, float src) { + generic int* generic_ptr = (generic int*) as3_ptr; // Should maybe be valid? +} + +void test_generic_to_numeric_as_implicit_cast() { + generic int* generic_ptr = 0; + __attribute__((address_space(3))) int *as3_ptr = generic_ptr; // expected-error{{initializing '__attribute__((address_space(3))) int *' with an expression of type '__generic int *' changes address space of pointer}} +} + +void test_generic_to_numeric_as_explicit_cast() { + generic int* generic_ptr = 0; + __attribute__((address_space(3))) int *as3_ptr = (__attribute__((address_space(3))) int *)generic_ptr; +} + +void test_generic_as_to_builtin_parameter_explicit_cast_numeric(__attribute__((address_space(3))) int *as3_ptr, float src) { + generic int* generic_ptr = as3_ptr; // FIXME: This should error + volatile float result = __builtin_amdgcn_ds_fmaxf((__attribute__((address_space(3))) float*) generic_ptr, src, 0, 0, false); // expected-error {{passing '__attribute__((address_space(3))) float *' to parameter of type '__local float *' changes address space of pointer}} +} + +void test_generic_as_to_builtin_parameterimplicit_cast_numeric(__attribute__((address_space(3))) int *as3_ptr, float src) { + generic int* generic_ptr = as3_ptr; + volatile float result = __builtin_amdgcn_ds_fmaxf(generic_ptr, src, 0, 0, false); // expected-warning {{incompatible pointer types passing '__generic int *' to parameter of type '__local float *'}} +} +