Index: include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- include/clang/Basic/BuiltinsAMDGPU.def +++ include/clang/Basic/BuiltinsAMDGPU.def @@ -14,6 +14,21 @@ // The format of this database matches clang/Basic/Builtins.def. +//===----------------------------------------------------------------------===// +// SI+ only builtins. +//===----------------------------------------------------------------------===// + +BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*2", "nc") +BUILTIN(__builtin_amdgcn_implicitarg_segment_ptr, "Uc*2", "nc") + +BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc") +BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc") +BUILTIN(__builtin_amdgcn_workgroup_id_z, "Ui", "nc") + +BUILTIN(__builtin_amdgcn_workitem_id_x, "Ui", "nc") +BUILTIN(__builtin_amdgcn_workitem_id_y, "Ui", "nc") +BUILTIN(__builtin_amdgcn_workitem_id_z, "Ui", "nc") + BUILTIN(__builtin_amdgcn_s_barrier, "v", "n") BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n") @@ -53,6 +68,36 @@ BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n") //===----------------------------------------------------------------------===// +// R600-NI only builtins. +//===----------------------------------------------------------------------===// + +BUILTIN(__builtin_r600_read_workdim, "Ui", "nc") + +BUILTIN(__builtin_r600_read_ngroups_x, "Ui", "nc") +BUILTIN(__builtin_r600_read_ngroups_y, "Ui", "nc") +BUILTIN(__builtin_r600_read_ngroups_z, "Ui", "nc") + +BUILTIN(__builtin_r600_read_global_size_x, "Ui", "nc") +BUILTIN(__builtin_r600_read_global_size_y, "Ui", "nc") +BUILTIN(__builtin_r600_read_global_size_z, "Ui", "nc") + +BUILTIN(__builtin_r600_read_local_size_x, "Ui", "nc") +BUILTIN(__builtin_r600_read_local_size_y, "Ui", "nc") +BUILTIN(__builtin_r600_read_local_size_z, "Ui", "nc") + +BUILTIN(__builtin_r600_read_tgid_x, "Ui", "nc") +BUILTIN(__builtin_r600_read_tgid_y, "Ui", "nc") +BUILTIN(__builtin_r600_read_tgid_z, "Ui", "nc") + +BUILTIN(__builtin_r600_read_tidig_x, "Ui", "nc") +BUILTIN(__builtin_r600_read_tidig_y, "Ui", "nc") +BUILTIN(__builtin_r600_read_tidig_z, "Ui", "nc") + +BUILTIN(__builtin_r600_read_global_offset_x, "Ui", "nc") +BUILTIN(__builtin_r600_read_global_offset_y, "Ui", "nc") +BUILTIN(__builtin_r600_read_global_offset_z, "Ui", "nc") + +//===----------------------------------------------------------------------===// // Legacy names with amdgpu prefix //===----------------------------------------------------------------------===// Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include using namespace clang; @@ -320,6 +321,17 @@ return CGF.Builder.CreateCall(F, {Src0, Src1}); } +static Value *emitRangedBuiltin(CodeGenFunction &CGF, + unsigned IntrinsicID, + int low, int high) { + llvm::MDBuilder MDHelper(CGF.getLLVMContext()); + llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); + Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); + llvm::Instruction *Call = CGF.Builder.CreateCall(F); + Call->setMetadata(llvm::LLVMContext::MD_range, RNode); + return Call; +} + namespace { struct WidthAndSignedness { unsigned Width; @@ -7143,6 +7155,32 @@ return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); } + + // amdgcn workitem + case AMDGPU::BI__builtin_amdgcn_workitem_id_x: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 256); + case AMDGPU::BI__builtin_amdgcn_workitem_id_y: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 256); + case AMDGPU::BI__builtin_amdgcn_workitem_id_z: + return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 256); + + // r600 workitem + case AMDGPU::BI__builtin_r600_read_workdim: + return emitRangedBuiltin(*this, Intrinsic::r600_read_workdim, 1, 4); + + case AMDGPU::BI__builtin_r600_read_local_size_x: + return emitRangedBuiltin(*this, Intrinsic::r600_read_local_size_x, 0, 256); + case AMDGPU::BI__builtin_r600_read_local_size_y: + return emitRangedBuiltin(*this, Intrinsic::r600_read_local_size_y, 0, 256); + case AMDGPU::BI__builtin_r600_read_local_size_z: + return emitRangedBuiltin(*this, Intrinsic::r600_read_local_size_z, 0, 256); + + case AMDGPU::BI__builtin_r600_read_tidig_x: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 256); + case AMDGPU::BI__builtin_r600_read_tidig_y: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 256); + case AMDGPU::BI__builtin_r600_read_tidig_z: + return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 256); default: return nullptr; } Index: test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- test/CodeGenOpenCL/builtins-amdgcn.cl +++ test/CodeGenOpenCL/builtins-amdgcn.cl @@ -275,3 +275,47 @@ { *out = __builtin_amdgpu_ldexp(a, b); } + +// CHECK-LABEL: @test_kernarg_segment_ptr +// CHECK: call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() +void test_kernarg_segment_ptr(__attribute__((address_space(2))) unsigned char ** out) +{ + *out = __builtin_amdgcn_kernarg_segment_ptr(); +} + +// CHECK-LABEL: @test_implicitarg_segment_ptr +// CHECK: call i8 addrspace(2)* @llvm.amdgcn.implicitarg.segment.ptr() +void test_implicitarg_segment_ptr(__attribute__((address_space(2))) unsigned char ** out) +{ + *out = __builtin_amdgcn_implicitarg_segment_ptr(); +} + +// CHECK-LABEL: @test_get_group_id( +// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.x() +// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.y() +// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.z() +void test_get_group_id(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_amdgcn_workgroup_id_x(); break; + case 1: *out = __builtin_amdgcn_workgroup_id_y(); break; + case 2: *out = __builtin_amdgcn_workgroup_id_z(); break; + default: *out = 0; + } +} + +// CHECK-LABEL: @test_get_local_id( +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[WI_RANGE:![0-9]*]] +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.y(), !range [[WI_RANGE]] +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.z(), !range [[WI_RANGE]] +void test_get_local_id(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_amdgcn_workitem_id_x(); break; + case 1: *out = __builtin_amdgcn_workitem_id_y(); break; + case 2: *out = __builtin_amdgcn_workitem_id_z(); break; + default: *out = 0; + } +} + +// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 256} Index: test/CodeGenOpenCL/builtins-r600.cl =================================================================== --- test/CodeGenOpenCL/builtins-r600.cl +++ test/CodeGenOpenCL/builtins-r600.cl @@ -30,3 +30,97 @@ { *out = __builtin_amdgpu_ldexp(a, b); } + +// CHECK-LABEL: @test_get_work_dim( +// CHECK: tail call i32 @llvm.r600.read.workdim(), !range [[DIM_RANGE:![0-9]*]] +void test_get_work_dim(global int* out) +{ + *out = __builtin_r600_read_workdim(); +} + +// CHECK-LABEL: @test_get_num_groups( +// CHECK: tail call i32 @llvm.r600.read.ngroups.x() +// CHECK: tail call i32 @llvm.r600.read.ngroups.y() +// CHECK: tail call i32 @llvm.r600.read.ngroups.z() +void test_get_num_groups(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_r600_read_ngroups_x(); break; + case 1: *out = __builtin_r600_read_ngroups_y(); break; + case 2: *out = __builtin_r600_read_ngroups_z(); break; + default: *out = 1; + } +} + +// CHECK-LABEL: @test_get_global_size( +// CHECK: tail call i32 @llvm.r600.read.global.size.x() +// CHECK: tail call i32 @llvm.r600.read.global.size.y() +// CHECK: tail call i32 @llvm.r600.read.global.size.z() +void test_get_global_size(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_r600_read_global_size_x(); break; + case 1: *out = __builtin_r600_read_global_size_y(); break; + case 2: *out = __builtin_r600_read_global_size_z(); break; + default: *out = 1; + } +} + +// CHECK-LABEL: @test_get_local_size( +// CHECK: tail call i32 @llvm.r600.read.local.size.x(), !range [[WI_RANGE:![0-9]*]] +// CHECK: tail call i32 @llvm.r600.read.local.size.y(), !range [[WI_RANGE]] +// CHECK: tail call i32 @llvm.r600.read.local.size.z(), !range [[WI_RANGE]] +void test_get_local_size(int d, global int * out) +{ + switch (d) { + case 0: *out = __builtin_r600_read_local_size_x(); break; + case 1: *out = __builtin_r600_read_local_size_y(); break; + case 2: *out = __builtin_r600_read_local_size_z(); break; + default: *out = 1; + } +} + +// CHECK-LABEL: @test_get_group_id( +// CHECK: tail call i32 @llvm.r600.read.tgid.x() +// CHECK: tail call i32 @llvm.r600.read.tgid.y() +// CHECK: tail call i32 @llvm.r600.read.tgid.z() +void test_get_group_id(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_r600_read_tgid_x(); break; + case 1: *out = __builtin_r600_read_tgid_y(); break; + case 2: *out = __builtin_r600_read_tgid_z(); break; + default: *out = 0; + } +} + +// CHECK-LABEL: @test_get_local_id( +// CHECK: tail call i32 @llvm.r600.read.tidig.x(), !range [[WI_RANGE]] +// CHECK: tail call i32 @llvm.r600.read.tidig.y(), !range [[WI_RANGE]] +// CHECK: tail call i32 @llvm.r600.read.tidig.z(), !range [[WI_RANGE]] +void test_get_local_id(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_r600_read_tidig_x(); break; + case 1: *out = __builtin_r600_read_tidig_y(); break; + case 2: *out = __builtin_r600_read_tidig_z(); break; + default: *out = 0; + } +} + +// CHECK-LABEL: @test_get_global_offset( +// CHECK: tail call i32 @llvm.r600.read.global.offset.x() +// CHECK: tail call i32 @llvm.r600.read.global.offset.y() +// CHECK: tail call i32 @llvm.r600.read.global.offset.z() +int test_get_global_offset(int d, global int *out) +{ + switch (d) { + case 0: *out = __builtin_r600_read_global_offset_x(); break; + case 1: *out = __builtin_r600_read_global_offset_y(); break; + case 2: *out = __builtin_r600_read_global_offset_z(); break; + default: *out = 0; + } +} + +// CHECK-DAG: [[DIM_RANGE]] = !{i32 1, i32 4} +// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 256}