diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2455,6 +2455,63 @@ and ``__OPENCL_MEMORY_SCOPE_SUB_GROUP`` are provided, with values corresponding to the enumerators of OpenCL's ``memory_scope`` enumeration.) +AMDGCN specific builtins +------------------------- + +``__builtin_amdgcn_fence`` +------------------------- + +``__builtin_amdgcn_fence`` allows using `Fence instruction `_ +from clang. It takes C++11 compatible memory-ordering and AMDGCN-specific +sync-scope as arguments, and generates a fence instruction in the IR. + +**Syntax**: + +.. code-block:: c++ + + __builtin_amdgcn_fence(unsigned int memory_ordering, String sync_scope) + +**Example of use**: + +.. code-block:: c++ + + void my_fence(int i) { + i++; + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup"); + i--; + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent"); + } + +**Description**: + +The first argument of ``__builtin_amdgcn_fence()`` builtin is one of the +memory-ordering specifiers ``__ATOMIC_ACQUIRE``, ``__ATOMIC_RELEASE``, +``__ATOMIC_ACQ_REL``, or ``__ATOMIC_SEQ_CST`` following C++11 memory model +semantics. Equivalent enum values of these memory-ordering can also be +specified. The builtin maps these C++ memory-ordering to corresponding +LLVM Atomic Memory Ordering for the fence instruction using LLVM Atomic C +ABI, as given in the table below. The second argument is a AMDGCN-specific +synchronization scope defined as a String. It can take any of the sync scopes +defined for `AMDHSA LLVM Sync Scopes `_ +This builtin transparently passes the second argument to fence instruction +and relies on AMDGCN implementation for validity check. + ++------------------------------+--------------------------------+ +| Input in clang | Output in IR | +| (C++11 Memory-ordering) | (LLVM Atomic Memory-ordering) | ++======================+=======+========================+=======+ +| Enum | Value | Enum | Value | ++----------------------+-------+------------------------+-------+ +| ``__ATOMIC_ACQUIRE`` | 2 | Acquire | 4 | ++----------------------+-------+------------------------+-------+ +| ``__ATOMIC_RELEASE`` | 3 | Release | 5 | ++----------------------+-------+------------------------+-------+ +| ``__ATOMIC_ACQ_REL`` | 4 | AcquireRelease | 6 | ++----------------------+-------+------------------------+-------+ +| ``__ATOMIC_SEQ_CST`` | 5 | SequentiallyConsistent | 7 | ++----------------------+-------+------------------------+-------+ + + Low-level ARM exclusive memory builtins --------------------------------------- diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -785,11 +785,6 @@ BUILTIN(__sync_fetch_and_umin, "UiUiD*Ui", "n") BUILTIN(__sync_fetch_and_umax, "UiUiD*Ui", "n") -// clang builtin to expose llvm fence instruction -// First argument : uint in range [2, 5] i.e. [acquire, seq_cst] -// Second argument : target specific sync scope string -BUILTIN(__builtin_memory_fence, "vUicC*", "n") - // Random libc builtins. BUILTIN(__builtin_abort, "v", "Fnr") BUILTIN(__builtin_index, "c*cC*i", "Fn") diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -53,6 +53,7 @@ BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n") BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n") BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n") +BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n") // FIXME: Need to disallow constant address space. BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11896,6 +11896,7 @@ bool CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13618,7 +13618,7 @@ return Builder.CreateCall(F, { Src0, Src1, Src2 }); } - case Builtin::BI__builtin_memory_fence: { + case AMDGPU::BI__builtin_amdgcn_fence: { llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; Value *Order = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1870,34 +1870,6 @@ : "__builtin_frame_address") << TheCall->getSourceRange(); } break; - - case Builtin::BI__builtin_memory_fence: { - ExprResult Arg = TheCall->getArg(0); - auto ArgExpr = Arg.get(); - Expr::EvalResult ArgResult; - - if(!ArgExpr->EvaluateAsInt(ArgResult, Context)) { - Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int) - << ArgExpr->getType(); - return ExprError(); - } - int ord = ArgResult.Val.getInt().getZExtValue(); - - // Check valididty of memory ordering as per C11 / C++11's memody model. - switch (static_cast(ord)) { - case llvm::AtomicOrderingCABI::acquire: - case llvm::AtomicOrderingCABI::release: - case llvm::AtomicOrderingCABI::acq_rel: - case llvm::AtomicOrderingCABI::seq_cst: - break; - default: { - Diag(ArgExpr->getBeginLoc(), - diag::warn_atomic_op_has_invalid_memory_order) - << ArgExpr->getSourceRange(); - return ExprError(); - } - } - } break; } // Since the target specific builtins for each arch overlap, only check those @@ -1948,6 +1920,10 @@ if (CheckPPCBuiltinFunctionCall(BuiltinID, TheCall)) return ExprError(); break; + case llvm::Triple::amdgcn: + if (CheckAMDGCNBuiltinFunctionCall(BuiltinID, TheCall)) + return ExprError(); + break; default: break; } @@ -2949,6 +2925,37 @@ return SemaBuiltinConstantArgRange(TheCall, i, l, u); } +bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_fence: { + ExprResult Arg = TheCall->getArg(0); + auto ArgExpr = Arg.get(); + Expr::EvalResult ArgResult; + + if(!ArgExpr->EvaluateAsInt(ArgResult, Context)) { + return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int) + << ArgExpr->getType(); + } + int ord = ArgResult.Val.getInt().getZExtValue(); + + // Check valididty of memory ordering as per C11 / C++11's memody model. + switch (static_cast(ord)) { + case llvm::AtomicOrderingCABI::acquire: + case llvm::AtomicOrderingCABI::release: + case llvm::AtomicOrderingCABI::acq_rel: + case llvm::AtomicOrderingCABI::seq_cst: + break; + default: { + return Diag(ArgExpr->getBeginLoc(), + diag::warn_atomic_op_has_invalid_memory_order) + << ArgExpr->getSourceRange(); + } + } + } break; + } + return false; +} + bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (BuiltinID == SystemZ::BI__builtin_tabort) { diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp @@ -0,0 +1,9 @@ +// REQUIRES: amdgpu-registered-target +// RUN: not %clang_cc1 %s -S \ +// RUN: -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s + +void test_amdgcn_fence_failure() { + + // CHECK: error: Unsupported atomic synchronization scope + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "foobar"); +} \ No newline at end of file diff --git a/clang/test/CodeGenHIP/builtin_memory_fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp rename from clang/test/CodeGenHIP/builtin_memory_fence.cpp rename to clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp --- a/clang/test/CodeGenHIP/builtin_memory_fence.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp @@ -1,25 +1,22 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 %s -x hip -emit-llvm -O0 -o - \ +// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \ // RUN: -triple=amdgcn-amd-amdhsa | opt -instnamer -S | FileCheck %s void test_memory_fence_success() { // CHECK-LABEL: test_memory_fence_success // CHECK: fence syncscope("workgroup") seq_cst - __builtin_memory_fence(__ATOMIC_SEQ_CST, "workgroup"); + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup"); // CHECK: fence syncscope("agent") acquire - __builtin_memory_fence(__ATOMIC_ACQUIRE, "agent"); + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent"); // CHECK: fence seq_cst - __builtin_memory_fence(__ATOMIC_SEQ_CST, ""); + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); // CHECK: fence syncscope("agent") acq_rel - __builtin_memory_fence(4, "agent"); + __builtin_amdgcn_fence(4, "agent"); // CHECK: fence syncscope("workgroup") release - __builtin_memory_fence(3, "workgroup"); - - // CHECK: fence syncscope("foobar") release - __builtin_memory_fence(3, "foobar"); -} \ No newline at end of file + __builtin_amdgcn_fence(3, "workgroup"); +} diff --git a/clang/test/Sema/builtins.c b/clang/test/Sema/builtins.c --- a/clang/test/Sema/builtins.c +++ b/clang/test/Sema/builtins.c @@ -320,15 +320,3 @@ // expected-error@+1 {{use of unknown builtin '__builtin_is_constant_evaluated'}} return __builtin_is_constant_evaluated(); } - -void test_memory_fence_errors() { - __builtin_memory_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} - - __builtin_memory_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} - - __builtin_memory_fence(4); // expected-error {{too few arguments to function call, expected 2}} - - __builtin_memory_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}} - - __builtin_memory_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}} -} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl --- a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl @@ -128,3 +128,11 @@ *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}} *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}} } + +void test_fence() { + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}} + __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}} + __builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}} +}