Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -7029,8 +7029,6 @@ InGroup>; def err_atomic_op_has_invalid_synch_scope : Error< "synchronization scope argument to atomic operation is invalid">; -def err_atomic_op_has_non_constant_synch_scope : Error< - "non-constant synchronization scope argument to atomic operation is not supported">; def err_overflow_builtin_must_be_int : Error< "operand argument to overflow builtin must be an integer (%0 invalid)">; Index: include/clang/Basic/SyncScope.h =================================================================== --- include/clang/Basic/SyncScope.h +++ include/clang/Basic/SyncScope.h @@ -15,6 +15,8 @@ #ifndef LLVM_CLANG_BASIC_SYNCSCOPE_H #define LLVM_CLANG_BASIC_SYNCSCOPE_H +#include "llvm/ADT/SmallVector.h" + namespace clang { /// \brief Defines the synch scope values used by the atomic builtins and @@ -34,6 +36,28 @@ return Scope >= static_cast(SyncScope::OpenCLWorkGroup) && Scope <= static_cast(SyncScope::OpenCLSubGroup); } + +inline llvm::SmallVector getAllSyncScopeValues() { + llvm::SmallVector Scopes; + Scopes.push_back(SyncScope::OpenCLWorkGroup); + Scopes.push_back(SyncScope::OpenCLDevice); + Scopes.push_back(SyncScope::OpenCLAllSVMDevices); + Scopes.push_back(SyncScope::OpenCLSubGroup); + return Scopes; +} + +inline llvm::StringRef getAsString(SyncScope S) { + switch (S) { + case SyncScope::OpenCLWorkGroup: + return "opencl_workgroup"; + case SyncScope::OpenCLDevice: + return "opencl_device"; + case SyncScope::OpenCLAllSVMDevices: + return "opencl_allsvmdevices"; + case SyncScope::OpenCLSubGroup: + return "opencl_subgroup"; + } +} } #endif Index: lib/CodeGen/CGAtomic.cpp =================================================================== --- lib/CodeGen/CGAtomic.cpp +++ lib/CodeGen/CGAtomic.cpp @@ -18,6 +18,7 @@ #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" @@ -659,6 +660,51 @@ return DeclPtr; } +static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, + Address Ptr, Address Val1, Address Val2, + llvm::Value *IsWeak, llvm::Value *FailureOrder, + uint64_t Size, llvm::AtomicOrdering Order, + llvm::Value *Scope) { + if (auto SC = dyn_cast(Scope)) { + auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID( + static_cast(SC->getZExtValue()), CGF.CGM.getLLVMContext()); + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, + Order, SCID); + return; + } + + // Handle non-constant scope. + auto &Builder = CGF.Builder; + auto Scopes = getAllSyncScopeValues(); + llvm::DenseMap BB; + for (auto S : Scopes) + BB[static_cast(S)] = + CGF.createBasicBlock(getAsString(S), CGF.CurFn); + + llvm::BasicBlock *ContBB = + CGF.createBasicBlock("atomic.scope.continue", CGF.CurFn); + + auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false); + // If unsupported sync scope is encountered at run time, assume default sync + // scope. + auto Default = SyncScope::OpenCLAllSVMDevices; + llvm::SwitchInst *SI = + Builder.CreateSwitch(SC, BB[static_cast(Default)]); + for (auto S : Scopes) { + auto *B = BB[static_cast(S)]; + if (S != Default) + SI->addCase(Builder.getInt32(static_cast(S)), B); + + Builder.SetInsertPoint(B); + EmitAtomicOp( + CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, Order, + CGF.getTargetHooks().getLLVMSyncScopeID(S, CGF.getLLVMContext())); + Builder.CreateBr(ContBB); + } + + Builder.SetInsertPoint(ContBB); +} + static void AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args, bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy, @@ -1132,12 +1178,6 @@ E->getOp() == AtomicExpr::AO__atomic_load || E->getOp() == AtomicExpr::AO__atomic_load_n; - assert(isa(Scope) && - "Non-constant synchronization scope not supported"); - auto SCID = getTargetHooks().getLLVMSyncScopeID( - static_cast(cast(Scope)->getZExtValue()), - getLLVMContext()); - if (isa(Order)) { auto ord = cast(Order)->getZExtValue(); // We should not ever get to a case where the ordering isn't a valid C ABI @@ -1146,30 +1186,30 @@ switch ((llvm::AtomicOrderingCABI)ord) { case llvm::AtomicOrderingCABI::relaxed: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Monotonic, SCID); + llvm::AtomicOrdering::Monotonic, Scope); break; case llvm::AtomicOrderingCABI::consume: case llvm::AtomicOrderingCABI::acquire: if (IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Acquire, SCID); + llvm::AtomicOrdering::Acquire, Scope); break; case llvm::AtomicOrderingCABI::release: if (IsLoad) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Release, SCID); + llvm::AtomicOrdering::Release, Scope); break; case llvm::AtomicOrderingCABI::acq_rel: if (IsLoad || IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::AcquireRelease, SCID); + llvm::AtomicOrdering::AcquireRelease, Scope); break; case llvm::AtomicOrderingCABI::seq_cst: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::SequentiallyConsistent, SCID); + llvm::AtomicOrdering::SequentiallyConsistent, Scope); break; } if (RValTy->isVoidType()) @@ -1206,12 +1246,12 @@ // Emit all the different atomics Builder.SetInsertPoint(MonotonicBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Monotonic, SCID); + llvm::AtomicOrdering::Monotonic, Scope); Builder.CreateBr(ContBB); if (!IsStore) { Builder.SetInsertPoint(AcquireBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Acquire, SCID); + llvm::AtomicOrdering::Acquire, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); @@ -1221,7 +1261,7 @@ if (!IsLoad) { Builder.SetInsertPoint(ReleaseBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Release, SCID); + llvm::AtomicOrdering::Release, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release), ReleaseBB); @@ -1229,14 +1269,14 @@ if (!IsLoad && !IsStore) { Builder.SetInsertPoint(AcqRelBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::AcquireRelease, SCID); + llvm::AtomicOrdering::AcquireRelease, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel), AcqRelBB); } Builder.SetInsertPoint(SeqCstBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::SequentiallyConsistent, SCID); + llvm::AtomicOrdering::SequentiallyConsistent, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); Index: lib/Sema/SemaChecking.cpp =================================================================== --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -3130,11 +3130,8 @@ if (IsOpenCL) { Scope = TheCall->getArg(TheCall->getNumArgs() - 1); llvm::APSInt Result(32); - if (!Scope->isIntegerConstantExpr(Result, Context)) - Diag(Scope->getLocStart(), - diag::err_atomic_op_has_non_constant_synch_scope) - << Scope->getSourceRange(); - else if (!isValidSyncScopeValue(Result.getZExtValue())) + if (Scope->isIntegerConstantExpr(Result, Context) && + !isValidSyncScopeValue(Result.getZExtValue())) Diag(Scope->getLocStart(), diag::err_atomic_op_has_invalid_synch_scope) << Scope->getSourceRange(); } else { Index: test/CodeGenOpenCL/atomic-ops-libcall.cl =================================================================== --- test/CodeGenOpenCL/atomic-ops-libcall.cl +++ test/CodeGenOpenCL/atomic-ops-libcall.cl @@ -1,7 +1,7 @@ // RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple spir64 -emit-llvm | FileCheck -check-prefix=SPIR %s // RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple armv5e-none-linux-gnueabi -emit-llvm | FileCheck -check-prefix=ARM %s -void f(atomic_int *i, atomic_uint *ui, int cmp) { +void f(atomic_int *i, atomic_uint *ui, int cmp, int order, int scope) { int x; // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8 addrspace(4)* {{%[0-9]+}}, i32 5, i32 1) // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8* {{%[0-9]+}}, i32 5, i32 1) @@ -34,4 +34,7 @@ // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 4) x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_sub_group); #endif + // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 %order, i32 %order, i32 %scope) + // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 %order, i32 %order, i32 %scope) + x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, order, order, scope); } Index: test/CodeGenOpenCL/atomic-ops.cl =================================================================== --- test/CodeGenOpenCL/atomic-ops.cl +++ test/CodeGenOpenCL/atomic-ops.cl @@ -52,6 +52,81 @@ return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group); } +void fi5(atomic_int *i, int scope) { + // CHECK-LABEL: @fi5 + // CHECK: switch i32 %{{.*}}, label %opencl_allsvmdevices [ + // CHECK-NEXT: i32 1, label %opencl_workgroup + // CHECK-NEXT: i32 2, label %opencl_device + // CHECK-NEXT: i32 4, label %opencl_subgroup + // CHECK-NEXT: ] + // CHECK: opencl_workgroup: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst + // CHECK: br label %atomic.scope.continue + // CHECK: opencl_device: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst + // CHECK: br label %atomic.scope.continue + // CHECK: opencl_allsvmdevices: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst, align 4 + // CHECK: br label %atomic.scope.continue + // CHECK: opencl_subgroup: + // CHECK: %5 = load atomic i32, i32 addrspace(4)* %0 syncscope("subgroup") seq_cst, align 4 + // CHECK: br label %atomic.scope.continue + // CHECK: atomic.scope.continue: + int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); +} + +void fi6(atomic_int *i, int order, int scope) { + // CHECK-LABEL: @fi6 + // CHECK: switch i32 %{{.*}}, label %monotonic [ + // CHECK-NEXT: i32 1, label %acquire + // CHECK-NEXT: i32 2, label %acquire + // CHECK-NEXT: i32 5, label %seqcst + // CHECK-NEXT: ] + // CHECK: monotonic: + // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[MON_WG:.*]] + // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: acquire: + // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]] + // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: seqcst: + // CHECK: switch i32 %2, label %[[SEQ_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]] + // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: [[MON_WG]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") monotonic + // CHECK: [[MON_DEV]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") monotonic + // CHECK: [[MON_ALL]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} monotonic + // CHECK: [[MON_SUB]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") monotonic + // CHECK: [[ACQ_WG]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") acquire + // CHECK: [[ACQ_DEV]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") acquire + // CHECK: [[ACQ_ALL]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} acquire + // CHECK: [[ACQ_SUB]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") acquire + // CHECK: [[SEQ_WG]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst + // CHECK: [[SEQ_DEV]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst + // CHECK: [[SEQ_ALL]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst + // CHECK: [[SEQ_SUB]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst + int x = __opencl_atomic_load(i, order, scope); +} + float ff1(global atomic_float *d) { // CHECK-LABEL: @ff1 // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic Index: test/SemaOpenCL/atomic-ops.cl =================================================================== --- test/SemaOpenCL/atomic-ops.cl +++ test/SemaOpenCL/atomic-ops.cl @@ -151,7 +151,7 @@ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_device); (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_all_svm_devices); (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_sub_group); - (void)__opencl_atomic_load(Ap, memory_order_relaxed, scope); // expected-error{{non-constant synchronization scope argument to atomic operation is not supported}} + (void)__opencl_atomic_load(Ap, memory_order_relaxed, scope); (void)__opencl_atomic_load(Ap, memory_order_relaxed, 10); //expected-error{{synchronization scope argument to atomic operation is invalid}} }