diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -53,6 +53,7 @@ BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n") BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n") BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n") +BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n") // FIXME: Need to disallow constant address space. BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11896,6 +11896,7 @@ bool CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -28,6 +28,7 @@ #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" @@ -13616,6 +13617,43 @@ Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType()); return Builder.CreateCall(F, { Src0, Src1, Src2 }); } + + case AMDGPU::BI__builtin_amdgcn_fence: { + llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; + llvm::SyncScope::ID SSID; + Value *Order = EmitScalarExpr(E->getArg(0)); + Value *Scope = EmitScalarExpr(E->getArg(1)); + + if (isa(Order)) { + int ord = cast(Order)->getZExtValue(); + + // Map C11/C++11 memory ordering to LLVM memory ordering + switch (static_cast(ord)) { + case llvm::AtomicOrderingCABI::acquire: + AO = llvm::AtomicOrdering::Acquire; + break; + case llvm::AtomicOrderingCABI::release: + AO = llvm::AtomicOrdering::Release; + break; + case llvm::AtomicOrderingCABI::acq_rel: + AO = llvm::AtomicOrdering::AcquireRelease; + break; + case llvm::AtomicOrderingCABI::seq_cst: + AO = llvm::AtomicOrdering::SequentiallyConsistent; + break; + case llvm::AtomicOrderingCABI::consume: // not supported by LLVM fence + case llvm::AtomicOrderingCABI::relaxed: // not supported by LLVM fence + break; + } + + StringRef scp; + llvm::getConstantStringInfo(Scope, scp); + SSID = getLLVMContext().getOrInsertSyncScopeID(scp); + + return Builder.CreateFence(AO, SSID); + } + LLVM_FALLTHROUGH; + } default: return nullptr; } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1920,6 +1920,10 @@ if (CheckPPCBuiltinFunctionCall(BuiltinID, TheCall)) return ExprError(); break; + case llvm::Triple::amdgcn: + if (CheckAMDGCNBuiltinFunctionCall(BuiltinID, TheCall)) + return ExprError(); + break; default: break; } @@ -2921,6 +2925,37 @@ return SemaBuiltinConstantArgRange(TheCall, i, l, u); } +bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_fence: { + ExprResult Arg = TheCall->getArg(0); + auto ArgExpr = Arg.get(); + Expr::EvalResult ArgResult; + + if(!ArgExpr->EvaluateAsInt(ArgResult, Context)) { + return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int) + << ArgExpr->getType(); + } + int ord = ArgResult.Val.getInt().getZExtValue(); + + // Check valididty of memory ordering as per C11 / C++11's memody model. + switch (static_cast(ord)) { + case llvm::AtomicOrderingCABI::acquire: + case llvm::AtomicOrderingCABI::release: + case llvm::AtomicOrderingCABI::acq_rel: + case llvm::AtomicOrderingCABI::seq_cst: + break; + default: { + return Diag(ArgExpr->getBeginLoc(), + diag::warn_atomic_op_has_invalid_memory_order) + << ArgExpr->getSourceRange(); + } + } + } break; + } + return false; +} + bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (BuiltinID == SystemZ::BI__builtin_tabort) { diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp @@ -0,0 +1,22 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \ +// RUN: -triple=amdgcn-amd-amdhsa | opt -instnamer -S | FileCheck %s + +void test_memory_fence_success() { +// CHECK-LABEL: test_memory_fence_success + + // CHECK: fence syncscope("workgroup") seq_cst + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup"); + + // CHECK: fence syncscope("agent") acquire + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent"); + + // CHECK: fence seq_cst + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); + + // CHECK: fence syncscope("agent") acq_rel + __builtin_amdgcn_fence(4, "agent"); + + // CHECK: fence syncscope("workgroup") release + __builtin_amdgcn_fence(3, "workgroup"); +} diff --git a/clang/test/Sema/builtin-amdgcn-fence-failure.cpp b/clang/test/Sema/builtin-amdgcn-fence-failure.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Sema/builtin-amdgcn-fence-failure.cpp @@ -0,0 +1,9 @@ +// REQUIRES: amdgpu-registered-target +// RUN: not %clang_cc1 %s -S \ +// RUN: -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s + +void test_amdgcn_fence_failure() { + + // CHECK: error: Unsupported atomic synchronization scope + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "foobar"); +} \ No newline at end of file diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl --- a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl @@ -128,3 +128,11 @@ *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}} *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}} } + +void test_fence() { + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}} + __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}} + __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}} + __builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}} +}