Index: cfe/trunk/include/clang/AST/Expr.h =================================================================== --- cfe/trunk/include/clang/AST/Expr.h +++ cfe/trunk/include/clang/AST/Expr.h @@ -24,6 +24,7 @@ #include "clang/AST/Type.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/LangOptions.h" +#include "clang/Basic/SyncScope.h" #include "clang/Basic/TypeTraits.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" @@ -5067,8 +5068,8 @@ /// similarly-named C++11 instructions, and __c11 variants for , /// and corresponding __opencl_atomic_* for OpenCL 2.0. /// All of these instructions take one primary pointer, at least one memory -/// order, and one synchronization scope. The C++11 and __c11 atomic AtomicExpr -/// always take the default synchronization scope. +/// order. The instructions for which getScopeModel returns non-null value +/// take one synch scope. class AtomicExpr : public Expr { public: enum AtomicOp { @@ -5080,14 +5081,16 @@ }; private: - enum { PTR, ORDER, SCOPE, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR }; - Stmt* SubExprs[END_EXPR]; + /// \brief Location of sub-expressions. + /// The location of Scope sub-expression is NumSubExprs - 1, which is + /// not fixed, therefore is not defined in enum. + enum { PTR, ORDER, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR }; + Stmt *SubExprs[END_EXPR + 1]; unsigned NumSubExprs; SourceLocation BuiltinLoc, RParenLoc; AtomicOp Op; friend class ASTStmtReader; - public: AtomicExpr(SourceLocation BLoc, ArrayRef args, QualType t, AtomicOp op, SourceLocation RP); @@ -5106,7 +5109,8 @@ return cast(SubExprs[ORDER]); } Expr *getScope() const { - return cast(SubExprs[SCOPE]); + assert(getScopeModel() && "No scope"); + return cast(SubExprs[NumSubExprs - 1]); } Expr *getVal1() const { if (Op == AO__c11_atomic_init || Op == AO__opencl_atomic_init) @@ -5173,6 +5177,24 @@ const_child_range children() const { return const_child_range(SubExprs, SubExprs + NumSubExprs); } + + /// \brief Get atomic scope model for the atomic op code. + /// \return empty atomic scope model if the atomic op code does not have + /// scope operand. + static std::unique_ptr getScopeModel(AtomicOp Op) { + auto Kind = + (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max) + ? AtomicScopeModelKind::OpenCL + : AtomicScopeModelKind::None; + return AtomicScopeModel::create(Kind); + } + + /// \brief Get atomic scope model. + /// \return empty atomic scope model if this atomic expression does not have + /// scope operand. + std::unique_ptr getScopeModel() const { + return getScopeModel(getOp()); + } }; /// TypoExpr - Internal placeholder for expressions where typo correction Index: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td +++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td @@ -7012,8 +7012,6 @@ InGroup>; def err_atomic_op_has_invalid_synch_scope : Error< "synchronization scope argument to atomic operation is invalid">; -def err_atomic_op_has_non_constant_synch_scope : Error< - "non-constant synchronization scope argument to atomic operation is not supported">; def err_overflow_builtin_must_be_int : Error< "operand argument to overflow builtin must be an integer (%0 invalid)">; Index: cfe/trunk/include/clang/Basic/SyncScope.h =================================================================== --- cfe/trunk/include/clang/Basic/SyncScope.h +++ cfe/trunk/include/clang/Basic/SyncScope.h @@ -15,24 +15,139 @@ #ifndef LLVM_CLANG_BASIC_SYNCSCOPE_H #define LLVM_CLANG_BASIC_SYNCSCOPE_H +#include "clang/Basic/LangOptions.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include + namespace clang { -/// \brief Defines the synch scope values used by the atomic builtins and -/// expressions. +/// \brief Defines synch scope values used internally by clang. +/// +/// The enum values start from 0 and are contiguous. They are mainly used for +/// enumerating all supported synch scope values and mapping them to LLVM +/// synch scopes. Their numerical values may be different from the corresponding +/// synch scope enums used in source languages. +/// +/// In atomic builtin and expressions, language-specific synch scope enums are +/// used. Currently only OpenCL memory scope enums are supported and assumed +/// to be used by all languages. However, in the future, other languages may +/// define their own set of synch scope enums. The language-specific synch scope +/// values are represented by class AtomicScopeModel and its derived classes. +/// +/// To add a new enum value: +/// Add the enum value to enum class SyncScope. +/// Update enum value Last if necessary. +/// Update getAsString. /// -/// The enum values should match the pre-defined macros -/// __OPENCL_MEMORY_SCOPE_*, which are used to define memory_scope_* -/// enums in opencl-c.h. enum class SyncScope { - OpenCLWorkGroup = 1, - OpenCLDevice = 2, - OpenCLAllSVMDevices = 3, - OpenCLSubGroup = 4, + OpenCLWorkGroup, + OpenCLDevice, + OpenCLAllSVMDevices, + OpenCLSubGroup, + Last = OpenCLSubGroup +}; + +inline llvm::StringRef getAsString(SyncScope S) { + switch (S) { + case SyncScope::OpenCLWorkGroup: + return "opencl_workgroup"; + case SyncScope::OpenCLDevice: + return "opencl_device"; + case SyncScope::OpenCLAllSVMDevices: + return "opencl_allsvmdevices"; + case SyncScope::OpenCLSubGroup: + return "opencl_subgroup"; + } + llvm_unreachable("Invalid synch scope"); +} + +/// \brief Defines the kind of atomic scope models. +enum class AtomicScopeModelKind { None, OpenCL }; + +/// \brief Defines the interface for synch scope model. +class AtomicScopeModel { +public: + virtual ~AtomicScopeModel() {} + /// \brief Maps language specific synch scope values to internal + /// SyncScope enum. + virtual SyncScope map(unsigned S) const = 0; + + /// \brief Check if the compile-time constant synch scope value + /// is valid. + virtual bool isValid(unsigned S) const = 0; + + /// \brief Get all possible synch scope values that might be + /// encountered at runtime for the current language. + virtual ArrayRef getRuntimeValues() const = 0; + + /// \brief If atomic builtin function is called with invalid + /// synch scope value at runtime, it will fall back to a valid + /// synch scope value returned by this function. + virtual unsigned getFallBackValue() const = 0; + + /// \brief Create an atomic scope model by AtomicScopeModelKind. + /// \return an empty std::unique_ptr for AtomicScopeModelKind::None. + static std::unique_ptr create(AtomicScopeModelKind K); +}; + +/// \brief Defines the synch scope model for OpenCL. +class AtomicScopeOpenCLModel : public AtomicScopeModel { +public: + /// The enum values match the pre-defined macros + /// __OPENCL_MEMORY_SCOPE_*, which are used to define memory_scope_* + /// enums in opencl-c.h. + enum ID { + WorkGroup = 1, + Device = 2, + AllSVMDevices = 3, + SubGroup = 4, + Last = SubGroup + }; + + AtomicScopeOpenCLModel() {} + + SyncScope map(unsigned S) const override { + switch (static_cast(S)) { + case WorkGroup: + return SyncScope::OpenCLWorkGroup; + case Device: + return SyncScope::OpenCLDevice; + case AllSVMDevices: + return SyncScope::OpenCLAllSVMDevices; + case SubGroup: + return SyncScope::OpenCLSubGroup; + } + llvm_unreachable("Invalid language synch scope value"); + } + + bool isValid(unsigned S) const override { + return S >= static_cast(WorkGroup) && + S <= static_cast(Last); + } + + ArrayRef getRuntimeValues() const override { + static_assert(Last == SubGroup, "Does not include all synch scopes"); + static const unsigned Scopes[] = { + static_cast(WorkGroup), static_cast(Device), + static_cast(AllSVMDevices), static_cast(SubGroup)}; + return llvm::makeArrayRef(Scopes); + } + + unsigned getFallBackValue() const override { + return static_cast(AllSVMDevices); + } }; -inline bool isValidSyncScopeValue(unsigned Scope) { - return Scope >= static_cast(SyncScope::OpenCLWorkGroup) && - Scope <= static_cast(SyncScope::OpenCLSubGroup); +inline std::unique_ptr +AtomicScopeModel::create(AtomicScopeModelKind K) { + switch (K) { + case AtomicScopeModelKind::None: + return std::unique_ptr{}; + case AtomicScopeModelKind::OpenCL: + return llvm::make_unique(); + } + llvm_unreachable("Invalid atomic scope model kind"); } } Index: cfe/trunk/lib/AST/Expr.cpp =================================================================== --- cfe/trunk/lib/AST/Expr.cpp +++ cfe/trunk/lib/AST/Expr.cpp @@ -3939,16 +3939,13 @@ switch (Op) { case AO__c11_atomic_init: case AO__opencl_atomic_init: - return 2; case AO__c11_atomic_load: - case AO__opencl_atomic_load: case AO__atomic_load_n: - return 3; + return 2; + case AO__opencl_atomic_load: case AO__c11_atomic_store: case AO__c11_atomic_exchange: - case AO__opencl_atomic_store: - case AO__opencl_atomic_exchange: case AO__atomic_load: case AO__atomic_store: case AO__atomic_store_n: @@ -3958,13 +3955,6 @@ case AO__c11_atomic_fetch_and: case AO__c11_atomic_fetch_or: case AO__c11_atomic_fetch_xor: - case AO__opencl_atomic_fetch_add: - case AO__opencl_atomic_fetch_sub: - case AO__opencl_atomic_fetch_and: - case AO__opencl_atomic_fetch_or: - case AO__opencl_atomic_fetch_xor: - case AO__opencl_atomic_fetch_min: - case AO__opencl_atomic_fetch_max: case AO__atomic_fetch_add: case AO__atomic_fetch_sub: case AO__atomic_fetch_and: @@ -3977,20 +3967,29 @@ case AO__atomic_or_fetch: case AO__atomic_xor_fetch: case AO__atomic_nand_fetch: - return 4; + return 3; + case AO__opencl_atomic_store: + case AO__opencl_atomic_exchange: + case AO__opencl_atomic_fetch_add: + case AO__opencl_atomic_fetch_sub: + case AO__opencl_atomic_fetch_and: + case AO__opencl_atomic_fetch_or: + case AO__opencl_atomic_fetch_xor: + case AO__opencl_atomic_fetch_min: + case AO__opencl_atomic_fetch_max: case AO__atomic_exchange: - return 5; + return 4; case AO__c11_atomic_compare_exchange_strong: case AO__c11_atomic_compare_exchange_weak: + return 5; + case AO__opencl_atomic_compare_exchange_strong: case AO__opencl_atomic_compare_exchange_weak: - return 6; - case AO__atomic_compare_exchange: case AO__atomic_compare_exchange_n: - return 7; + return 6; } llvm_unreachable("unknown atomic op"); } Index: cfe/trunk/lib/CodeGen/CGAtomic.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGAtomic.cpp +++ cfe/trunk/lib/CodeGen/CGAtomic.cpp @@ -18,6 +18,7 @@ #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" @@ -659,6 +660,61 @@ return DeclPtr; } +static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, + Address Ptr, Address Val1, Address Val2, + llvm::Value *IsWeak, llvm::Value *FailureOrder, + uint64_t Size, llvm::AtomicOrdering Order, + llvm::Value *Scope) { + auto ScopeModel = Expr->getScopeModel(); + + // LLVM atomic instructions always have synch scope. If clang atomic + // expression has no scope operand, use default LLVM synch scope. + if (!ScopeModel) { + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, + Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID("")); + return; + } + + // Handle constant scope. + if (auto SC = dyn_cast(Scope)) { + auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID( + ScopeModel->map(SC->getZExtValue()), CGF.CGM.getLLVMContext()); + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, + Order, SCID); + return; + } + + // Handle non-constant scope. + auto &Builder = CGF.Builder; + auto Scopes = ScopeModel->getRuntimeValues(); + llvm::DenseMap BB; + for (auto S : Scopes) + BB[S] = CGF.createBasicBlock(getAsString(ScopeModel->map(S)), CGF.CurFn); + + llvm::BasicBlock *ContBB = + CGF.createBasicBlock("atomic.scope.continue", CGF.CurFn); + + auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false); + // If unsupported synch scope is encountered at run time, assume a fallback + // synch scope value. + auto FallBack = ScopeModel->getFallBackValue(); + llvm::SwitchInst *SI = Builder.CreateSwitch(SC, BB[FallBack]); + for (auto S : Scopes) { + auto *B = BB[S]; + if (S != FallBack) + SI->addCase(Builder.getInt32(S), B); + + Builder.SetInsertPoint(B); + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, + Order, + CGF.getTargetHooks().getLLVMSyncScopeID(ScopeModel->map(S), + CGF.getLLVMContext())); + Builder.CreateBr(ContBB); + } + + Builder.SetInsertPoint(ContBB); +} + static void AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args, bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy, @@ -711,7 +767,8 @@ } llvm::Value *Order = EmitScalarExpr(E->getOrder()); - llvm::Value *Scope = EmitScalarExpr(E->getScope()); + llvm::Value *Scope = + E->getScopeModel() ? EmitScalarExpr(E->getScope()) : nullptr; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: @@ -1132,12 +1189,6 @@ E->getOp() == AtomicExpr::AO__atomic_load || E->getOp() == AtomicExpr::AO__atomic_load_n; - assert(isa(Scope) && - "Non-constant synchronization scope not supported"); - auto SCID = getTargetHooks().getLLVMSyncScopeID( - static_cast(cast(Scope)->getZExtValue()), - getLLVMContext()); - if (isa(Order)) { auto ord = cast(Order)->getZExtValue(); // We should not ever get to a case where the ordering isn't a valid C ABI @@ -1146,30 +1197,30 @@ switch ((llvm::AtomicOrderingCABI)ord) { case llvm::AtomicOrderingCABI::relaxed: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Monotonic, SCID); + llvm::AtomicOrdering::Monotonic, Scope); break; case llvm::AtomicOrderingCABI::consume: case llvm::AtomicOrderingCABI::acquire: if (IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Acquire, SCID); + llvm::AtomicOrdering::Acquire, Scope); break; case llvm::AtomicOrderingCABI::release: if (IsLoad) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Release, SCID); + llvm::AtomicOrdering::Release, Scope); break; case llvm::AtomicOrderingCABI::acq_rel: if (IsLoad || IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::AcquireRelease, SCID); + llvm::AtomicOrdering::AcquireRelease, Scope); break; case llvm::AtomicOrderingCABI::seq_cst: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::SequentiallyConsistent, SCID); + llvm::AtomicOrdering::SequentiallyConsistent, Scope); break; } if (RValTy->isVoidType()) @@ -1206,12 +1257,12 @@ // Emit all the different atomics Builder.SetInsertPoint(MonotonicBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Monotonic, SCID); + llvm::AtomicOrdering::Monotonic, Scope); Builder.CreateBr(ContBB); if (!IsStore) { Builder.SetInsertPoint(AcquireBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Acquire, SCID); + llvm::AtomicOrdering::Acquire, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); @@ -1221,7 +1272,7 @@ if (!IsLoad) { Builder.SetInsertPoint(ReleaseBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Release, SCID); + llvm::AtomicOrdering::Release, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release), ReleaseBB); @@ -1229,14 +1280,14 @@ if (!IsLoad && !IsStore) { Builder.SetInsertPoint(AcqRelBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::AcquireRelease, SCID); + llvm::AtomicOrdering::AcquireRelease, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel), AcqRelBB); } Builder.SetInsertPoint(SeqCstBB); EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::SequentiallyConsistent, SCID); + llvm::AtomicOrdering::SequentiallyConsistent, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); Index: cfe/trunk/lib/Frontend/InitPreprocessor.cpp =================================================================== --- cfe/trunk/lib/Frontend/InitPreprocessor.cpp +++ cfe/trunk/lib/Frontend/InitPreprocessor.cpp @@ -579,10 +579,12 @@ // Define macros for the OpenCL memory scope. // The values should match clang SyncScope enum. - assert(static_cast(SyncScope::OpenCLWorkGroup) == 1 && - static_cast(SyncScope::OpenCLDevice) == 2 && - static_cast(SyncScope::OpenCLAllSVMDevices) == 3 && - static_cast(SyncScope::OpenCLSubGroup) == 4); + static_assert( + static_cast(AtomicScopeOpenCLModel::WorkGroup) == 1 && + static_cast(AtomicScopeOpenCLModel::Device) == 2 && + static_cast(AtomicScopeOpenCLModel::AllSVMDevices) == 3 && + static_cast(AtomicScopeOpenCLModel::SubGroup) == 4, + "Invalid OpenCL memory scope enum definition"); Builder.defineMacro("__OPENCL_MEMORY_SCOPE_WORK_ITEM", "0"); Builder.defineMacro("__OPENCL_MEMORY_SCOPE_WORK_GROUP", "1"); Builder.defineMacro("__OPENCL_MEMORY_SCOPE_DEVICE", "2"); Index: cfe/trunk/lib/Sema/SemaChecking.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaChecking.cpp +++ cfe/trunk/lib/Sema/SemaChecking.cpp @@ -3145,27 +3145,6 @@ TheCall->setArg(i, Arg.get()); } - Expr *Scope; - if (Form != Init) { - if (IsOpenCL) { - Scope = TheCall->getArg(TheCall->getNumArgs() - 1); - llvm::APSInt Result(32); - if (!Scope->isIntegerConstantExpr(Result, Context)) - Diag(Scope->getLocStart(), - diag::err_atomic_op_has_non_constant_synch_scope) - << Scope->getSourceRange(); - else if (!isValidSyncScopeValue(Result.getZExtValue())) - Diag(Scope->getLocStart(), diag::err_atomic_op_has_invalid_synch_scope) - << Scope->getSourceRange(); - } else { - Scope = IntegerLiteral::Create( - Context, - llvm::APInt(Context.getTypeSize(Context.IntTy), - static_cast(SyncScope::OpenCLAllSVMDevices)), - Context.IntTy, SourceLocation()); - } - } - // Permute the arguments into a 'consistent' order. SmallVector SubExprs; SubExprs.push_back(Ptr); @@ -3176,33 +3155,28 @@ break; case Load: SubExprs.push_back(TheCall->getArg(1)); // Order - SubExprs.push_back(Scope); // Scope break; case LoadCopy: case Copy: case Arithmetic: case Xchg: SubExprs.push_back(TheCall->getArg(2)); // Order - SubExprs.push_back(Scope); // Scope SubExprs.push_back(TheCall->getArg(1)); // Val1 break; case GNUXchg: // Note, AtomicExpr::getVal2() has a special case for this atomic. SubExprs.push_back(TheCall->getArg(3)); // Order - SubExprs.push_back(Scope); // Scope SubExprs.push_back(TheCall->getArg(1)); // Val1 SubExprs.push_back(TheCall->getArg(2)); // Val2 break; case C11CmpXchg: SubExprs.push_back(TheCall->getArg(3)); // Order - SubExprs.push_back(Scope); // Scope SubExprs.push_back(TheCall->getArg(1)); // Val1 SubExprs.push_back(TheCall->getArg(4)); // OrderFail SubExprs.push_back(TheCall->getArg(2)); // Val2 break; case GNUCmpXchg: SubExprs.push_back(TheCall->getArg(4)); // Order - SubExprs.push_back(Scope); // Scope SubExprs.push_back(TheCall->getArg(1)); // Val1 SubExprs.push_back(TheCall->getArg(5)); // OrderFail SubExprs.push_back(TheCall->getArg(2)); // Val2 @@ -3219,6 +3193,17 @@ << SubExprs[1]->getSourceRange(); } + if (auto ScopeModel = AtomicExpr::getScopeModel(Op)) { + auto *Scope = TheCall->getArg(TheCall->getNumArgs() - 1); + llvm::APSInt Result(32); + if (Scope->isIntegerConstantExpr(Result, Context) && + !ScopeModel->isValid(Result.getZExtValue())) { + Diag(Scope->getLocStart(), diag::err_atomic_op_has_invalid_synch_scope) + << Scope->getSourceRange(); + } + SubExprs.push_back(Scope); + } + AtomicExpr *AE = new (Context) AtomicExpr(TheCall->getCallee()->getLocStart(), SubExprs, ResultType, Op, TheCall->getRParenLoc()); Index: cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl =================================================================== --- cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl +++ cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl @@ -1,7 +1,7 @@ // RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple spir64 -emit-llvm | FileCheck -check-prefix=SPIR %s // RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple armv5e-none-linux-gnueabi -emit-llvm | FileCheck -check-prefix=ARM %s -void f(atomic_int *i, atomic_uint *ui, int cmp) { +void f(atomic_int *i, atomic_uint *ui, int cmp, int order, int scope) { int x; // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8 addrspace(4)* {{%[0-9]+}}, i32 5, i32 1) // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8* {{%[0-9]+}}, i32 5, i32 1) @@ -34,4 +34,7 @@ // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 4) x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_sub_group); #endif + // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) + // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) + x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, order, order, scope); } Index: cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl =================================================================== --- cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl +++ cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl @@ -52,6 +52,81 @@ return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group); } +void fi5(atomic_int *i, int scope) { + // CHECK-LABEL: @fi5 + // CHECK: switch i32 %{{.*}}, label %opencl_allsvmdevices [ + // CHECK-NEXT: i32 1, label %opencl_workgroup + // CHECK-NEXT: i32 2, label %opencl_device + // CHECK-NEXT: i32 4, label %opencl_subgroup + // CHECK-NEXT: ] + // CHECK: opencl_workgroup: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst + // CHECK: br label %atomic.scope.continue + // CHECK: opencl_device: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst + // CHECK: br label %atomic.scope.continue + // CHECK: opencl_allsvmdevices: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst, align 4 + // CHECK: br label %atomic.scope.continue + // CHECK: opencl_subgroup: + // CHECK: %5 = load atomic i32, i32 addrspace(4)* %0 syncscope("subgroup") seq_cst, align 4 + // CHECK: br label %atomic.scope.continue + // CHECK: atomic.scope.continue: + int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); +} + +void fi6(atomic_int *i, int order, int scope) { + // CHECK-LABEL: @fi6 + // CHECK: switch i32 %{{.*}}, label %monotonic [ + // CHECK-NEXT: i32 1, label %acquire + // CHECK-NEXT: i32 2, label %acquire + // CHECK-NEXT: i32 5, label %seqcst + // CHECK-NEXT: ] + // CHECK: monotonic: + // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[MON_WG:.*]] + // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: acquire: + // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]] + // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: seqcst: + // CHECK: switch i32 %2, label %[[SEQ_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]] + // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: [[MON_WG]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") monotonic + // CHECK: [[MON_DEV]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") monotonic + // CHECK: [[MON_ALL]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} monotonic + // CHECK: [[MON_SUB]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") monotonic + // CHECK: [[ACQ_WG]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") acquire + // CHECK: [[ACQ_DEV]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") acquire + // CHECK: [[ACQ_ALL]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} acquire + // CHECK: [[ACQ_SUB]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") acquire + // CHECK: [[SEQ_WG]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("workgroup") seq_cst + // CHECK: [[SEQ_DEV]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("agent") seq_cst + // CHECK: [[SEQ_ALL]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} seq_cst + // CHECK: [[SEQ_SUB]]: + // CHECK: load atomic i32, i32 addrspace(4)* %{{.*}} syncscope("subgroup") seq_cst + int x = __opencl_atomic_load(i, order, scope); +} + float ff1(global atomic_float *d) { // CHECK-LABEL: @ff1 // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic Index: cfe/trunk/test/SemaOpenCL/atomic-ops.cl =================================================================== --- cfe/trunk/test/SemaOpenCL/atomic-ops.cl +++ cfe/trunk/test/SemaOpenCL/atomic-ops.cl @@ -14,7 +14,6 @@ int8 i64; atomic_int gn; - void f(atomic_int *i, const atomic_int *ci, atomic_intptr_t *p, atomic_float *d, int *I, const int *CI, @@ -81,6 +80,13 @@ } void memory_checks(atomic_int *Ap, int *p, int val) { + // non-integer memory order argument is casted to integer type. + (void)__opencl_atomic_load(Ap, 1.0f, memory_scope_work_group); + float forder; + (void)__opencl_atomic_load(Ap, forder, memory_scope_work_group); + struct S s; + (void)__opencl_atomic_load(Ap, s, memory_scope_work_group); // expected-error {{passing 'struct S' to parameter of incompatible type 'int'}} + (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_group); (void)__opencl_atomic_load(Ap, memory_order_acquire, memory_scope_work_group); (void)__opencl_atomic_load(Ap, memory_order_consume, memory_scope_work_group); // expected-error {{use of undeclared identifier 'memory_order_consume'}} @@ -151,8 +157,15 @@ (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_device); (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_all_svm_devices); (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_sub_group); - (void)__opencl_atomic_load(Ap, memory_order_relaxed, scope); // expected-error{{non-constant synchronization scope argument to atomic operation is not supported}} + (void)__opencl_atomic_load(Ap, memory_order_relaxed, scope); (void)__opencl_atomic_load(Ap, memory_order_relaxed, 10); //expected-error{{synchronization scope argument to atomic operation is invalid}} + + // non-integer memory scope is casted to integer type. + float fscope; + (void)__opencl_atomic_load(Ap, memory_order_relaxed, 1.0f); + (void)__opencl_atomic_load(Ap, memory_order_relaxed, fscope); + struct S s; + (void)__opencl_atomic_load(Ap, memory_order_relaxed, s); //expected-error{{passing 'struct S' to parameter of incompatible type 'int'}} } void nullPointerWarning(atomic_int *Ap, int *p, int val) {