diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -67,11 +67,6 @@ BUILTIN(__builtin_amdgcn_iglp_opt, "vIi", "n") BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n") BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n") -BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n") -BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n") -BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n") -BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n") -BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n") BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n") BUILTIN(__builtin_amdgcn_groupstaticsize, "Ui", "n") @@ -172,6 +167,15 @@ BUILTIN(__builtin_amdgcn_is_shared, "bvC*0", "nc") BUILTIN(__builtin_amdgcn_is_private, "bvC*0", "nc") +//===----------------------------------------------------------------------===// +// GWS builtins. +//===----------------------------------------------------------------------===// +TARGET_BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n", "gws") +TARGET_BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n", "gws") +TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n", "gws") +TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n", "gws") +TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n", "gws") + //===----------------------------------------------------------------------===// // CI+ only builtins. //===----------------------------------------------------------------------===// diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -244,7 +244,8 @@ MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); - ReadOnlyFeatures.insert("image-insts"); + for (auto F : {"image-insts", "gws"}) + ReadOnlyFeatures.insert(F); HalfArgsAndReturns = true; } diff --git a/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl b/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -triple amdgcn -target-feature +gws -o /dev/null %s 2>&1 \ +// RUN: | FileCheck --check-prefix=GWS %s + +// GWS: warning: feature flag '+gws' is ignored since the feature is read only [-Winvalid-command-line-argument] + +kernel void test() {} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx803 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90c -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -emit-llvm -o - %s | FileCheck %s + +typedef unsigned int uint; + +// CHECK-LABEL: define dso_local amdgpu_kernel void @test_builtins_amdgcn_gws_insts +// CHECK-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.amdgcn.ds.gws.init(i32 [[A]], i32 [[B]]) +// CHECK-NEXT: tail call void @llvm.amdgcn.ds.gws.barrier(i32 [[A]], i32 [[B]]) +// CHECK-NEXT: tail call void @llvm.amdgcn.ds.gws.sema.v(i32 [[A]]) +// CHECK-NEXT: tail call void @llvm.amdgcn.ds.gws.sema.br(i32 [[A]], i32 [[B]]) +// CHECK-NEXT: tail call void @llvm.amdgcn.ds.gws.sema.p(i32 [[A]]) +// CHECK-NEXT: ret void +// +kernel void test_builtins_amdgcn_gws_insts(uint a, uint b) { + __builtin_amdgcn_ds_gws_init(a, b); + __builtin_amdgcn_ds_gws_barrier(a, b); + __builtin_amdgcn_ds_gws_sema_v(a); + __builtin_amdgcn_ds_gws_sema_br(a, b); + __builtin_amdgcn_ds_gws_sema_p(a); +} diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90 --- a/flang/test/Lower/OpenMP/target_cpu_features.f90 +++ b/flang/test/Lower/OpenMP/target_cpu_features.f90 @@ -8,7 +8,7 @@ !CHECK: omp.target = #omp.target !CHECK-LABEL: func.func @_QPomp_target_simple() subroutine omp_target_simple diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -286,6 +286,7 @@ Features["gfx11-insts"] = true; Features["atomic-fadd-rtn-insts"] = true; Features["image-insts"] = true; + Features["gws"] = true; break; case GK_GFX1036: case GK_GFX1035: @@ -311,6 +312,7 @@ Features["image-insts"] = true; Features["s-memrealtime"] = true; Features["s-memtime-inst"] = true; + Features["gws"] = true; break; case GK_GFX1012: case GK_GFX1011: @@ -333,6 +335,7 @@ Features["image-insts"] = true; Features["s-memrealtime"] = true; Features["s-memtime-inst"] = true; + Features["gws"] = true; break; case GK_GFX942: case GK_GFX941: @@ -362,6 +365,7 @@ Features["s-memrealtime"] = true; Features["ci-insts"] = true; Features["s-memtime-inst"] = true; + Features["gws"] = true; break; case GK_GFX90A: Features["gfx90a-insts"] = true; @@ -412,6 +416,7 @@ case GK_GFX600: Features["image-insts"] = true; Features["s-memtime-inst"] = true; + Features["gws"] = true; break; case GK_NONE: break;