Index: docs/LanguageExtensions.rst =================================================================== --- docs/LanguageExtensions.rst +++ docs/LanguageExtensions.rst @@ -1929,7 +1929,13 @@ ``memory_order`` enumeration. (Note that Clang additionally provides GCC-compatible ``__atomic_*`` -builtins) +builtins and OpenCL 2.0 ``__opencl_atomic_*`` builtins. The OpenCL 2.0 +atomic builtins are an explicit form of the corresponding OpenCL 2.0 +builtin function, and are named with a ``__opencl_`` prefix. The macros +``__OPENCL_MEMORY_SCOPE_WORK_ITEM``, ``__OPENCL_MEMORY_SCOPE_WORK_GROUP``, +``__OPENCL_MEMORY_SCOPE_DEVICE``, ``__OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES``, +and ``__OPENCL_MEMORY_SCOPE_SUB_GROUP`` are provided, with values +corresponding to the enumerators of OpenCL's ``memory_scope`` enumeration.) Low-level ARM exclusive memory builtins --------------------------------------- Index: include/clang/AST/Expr.h =================================================================== --- include/clang/AST/Expr.h +++ include/clang/AST/Expr.h @@ -5064,9 +5064,11 @@ /// AtomicExpr - Variadic atomic builtins: __atomic_exchange, __atomic_fetch_*, /// __atomic_load, __atomic_store, and __atomic_compare_exchange_*, for the -/// similarly-named C++11 instructions, and __c11 variants for . -/// All of these instructions take one primary pointer and at least one memory -/// order. +/// similarly-named C++11 instructions, and __c11 variants for , +/// and corresponding __opencl_atomic_* for OpenCL 2.0. +/// All of these instructions take one primary pointer, at least one memory +/// order, and one synchronization scope. The C++11 and __c11 atomic AtomicExpr +/// always take the default synchronization scope. class AtomicExpr : public Expr { public: enum AtomicOp { @@ -5078,7 +5080,7 @@ }; private: - enum { PTR, ORDER, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR }; + enum { PTR, ORDER, SCOPE, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR }; Stmt* SubExprs[END_EXPR]; unsigned NumSubExprs; SourceLocation BuiltinLoc, RParenLoc; @@ -5103,8 +5105,11 @@ Expr *getOrder() const { return cast(SubExprs[ORDER]); } + Expr *getScope() const { + return cast(SubExprs[SCOPE]); + } Expr *getVal1() const { - if (Op == AO__c11_atomic_init) + if (Op == AO__c11_atomic_init || Op == AO__opencl_atomic_init) return cast(SubExprs[ORDER]); assert(NumSubExprs > VAL1); return cast(SubExprs[VAL1]); @@ -5123,6 +5128,7 @@ assert(NumSubExprs > WEAK); return cast(SubExprs[WEAK]); } + QualType getValueType() const; AtomicOp getOp() const { return Op; } unsigned getNumSubExprs() const { return NumSubExprs; } @@ -5139,10 +5145,17 @@ bool isCmpXChg() const { return getOp() == AO__c11_atomic_compare_exchange_strong || getOp() == AO__c11_atomic_compare_exchange_weak || + getOp() == AO__opencl_atomic_compare_exchange_strong || + getOp() == AO__opencl_atomic_compare_exchange_weak || getOp() == AO__atomic_compare_exchange || getOp() == AO__atomic_compare_exchange_n; } + bool isOpenCL() const { + return getOp() >= AO__opencl_atomic_init && + getOp() <= AO__opencl_atomic_fetch_max; + } + SourceLocation getBuiltinLoc() const { return BuiltinLoc; } SourceLocation getRParenLoc() const { return RParenLoc; } Index: include/clang/Basic/Builtins.def =================================================================== --- include/clang/Basic/Builtins.def +++ include/clang/Basic/Builtins.def @@ -700,6 +700,21 @@ BUILTIN(__atomic_always_lock_free, "izvCD*", "n") BUILTIN(__atomic_is_lock_free, "izvCD*", "n") +// OpenCL 2.0 atomic builtins. +ATOMIC_BUILTIN(__opencl_atomic_init, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_load, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_store, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_exchange, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_compare_exchange_strong, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_compare_exchange_weak, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_fetch_add, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_fetch_sub, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_fetch_and, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_fetch_or, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_fetch_xor, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_fetch_min, "v.", "t") +ATOMIC_BUILTIN(__opencl_atomic_fetch_max, "v.", "t") + #undef ATOMIC_BUILTIN // Non-overloaded atomic builtins. Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -6995,8 +6995,8 @@ "address argument to atomic operation must be a pointer to _Atomic " "type (%0 invalid)">; def err_atomic_op_needs_non_const_atomic : Error< - "address argument to atomic operation must be a pointer to non-const _Atomic " - "type (%0 invalid)">; + "address argument to atomic operation must be a pointer to non-%select{const|constant}0 _Atomic " + "type (%1 invalid)">; def err_atomic_op_needs_non_const_pointer : Error< "address argument to atomic operation must be a pointer to non-const " "type (%0 invalid)">; @@ -7012,6 +7012,10 @@ def warn_atomic_op_has_invalid_memory_order : Warning< "memory order argument to atomic operation is invalid">, InGroup>; +def err_atomic_op_has_invalid_synch_scope : Error< + "synchronization scope argument to atomic operation is invalid">; +def err_atomic_op_has_non_constant_synch_scope : Error< + "non-constant synchronization scope argument to atomic operation is not supported">; def err_overflow_builtin_must_be_int : Error< "operand argument to overflow builtin must be an integer (%0 invalid)">; Index: include/clang/Basic/SyncScope.h =================================================================== --- /dev/null +++ include/clang/Basic/SyncScope.h @@ -0,0 +1,39 @@ +//===--- SyncScope.h - Atomic synchronization scopes ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Provides definitions for the atomic synchronization scopes. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_SYNCSCOPE_H +#define LLVM_CLANG_BASIC_SYNCSCOPE_H + +namespace clang { + +/// \brief Defines the synch scope values used by the atomic builtins and +/// expressions. +/// +/// The enum values should match the pre-defined macros +/// __OPENCL_MEMORY_SCOPE_*, which are used to define memory_scope_* +/// enums in opencl-c.h. +enum class SyncScope { + OpenCLWorkGroup = 1, + OpenCLDevice = 2, + OpenCLAllSVMDevices = 3, + OpenCLSubGroup = 4, +}; + +inline bool isValidSyncScopeValue(unsigned Scope) { + return Scope >= static_cast(SyncScope::OpenCLWorkGroup) && + Scope <= static_cast(SyncScope::OpenCLSubGroup); +} +} + +#endif Index: lib/AST/ASTContext.cpp =================================================================== --- lib/AST/ASTContext.cpp +++ lib/AST/ASTContext.cpp @@ -1182,7 +1182,14 @@ ObjCSuperType = QualType(); // void * type - VoidPtrTy = getPointerType(VoidTy); + if (LangOpts.OpenCLVersion >= 200) { + auto Q = VoidTy.getQualifiers(); + Q.setAddressSpace(LangAS::opencl_generic); + VoidPtrTy = getPointerType(getCanonicalType( + getQualifiedType(VoidTy.getUnqualifiedType(), Q))); + } else { + VoidPtrTy = getPointerType(VoidTy); + } // nullptr type (C++0x 2.14.7) InitBuiltinType(NullPtrTy, BuiltinType::NullPtr); Index: lib/AST/Expr.cpp =================================================================== --- lib/AST/Expr.cpp +++ lib/AST/Expr.cpp @@ -3938,12 +3938,17 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) { switch (Op) { case AO__c11_atomic_init: + case AO__opencl_atomic_init: + return 2; case AO__c11_atomic_load: + case AO__opencl_atomic_load: case AO__atomic_load_n: - return 2; + return 3; case AO__c11_atomic_store: case AO__c11_atomic_exchange: + case AO__opencl_atomic_store: + case AO__opencl_atomic_exchange: case AO__atomic_load: case AO__atomic_store: case AO__atomic_store_n: @@ -3953,6 +3958,13 @@ case AO__c11_atomic_fetch_and: case AO__c11_atomic_fetch_or: case AO__c11_atomic_fetch_xor: + case AO__opencl_atomic_fetch_add: + case AO__opencl_atomic_fetch_sub: + case AO__opencl_atomic_fetch_and: + case AO__opencl_atomic_fetch_or: + case AO__opencl_atomic_fetch_xor: + case AO__opencl_atomic_fetch_min: + case AO__opencl_atomic_fetch_max: case AO__atomic_fetch_add: case AO__atomic_fetch_sub: case AO__atomic_fetch_and: @@ -3965,22 +3977,33 @@ case AO__atomic_or_fetch: case AO__atomic_xor_fetch: case AO__atomic_nand_fetch: - return 3; + return 4; case AO__atomic_exchange: - return 4; + return 5; case AO__c11_atomic_compare_exchange_strong: case AO__c11_atomic_compare_exchange_weak: - return 5; + case AO__opencl_atomic_compare_exchange_strong: + case AO__opencl_atomic_compare_exchange_weak: + return 6; case AO__atomic_compare_exchange: case AO__atomic_compare_exchange_n: - return 6; + return 7; } llvm_unreachable("unknown atomic op"); } +QualType AtomicExpr::getValueType() const { + auto T = getPtr()->getType()->castAs()->getPointeeType(); + if (auto AT = T->getAs()) { + return AT->getValueType(); + } else { + return T; + } +} + QualType OMPArraySectionExpr::getBaseOriginalType(const Expr *Base) { unsigned ArraySectionCount = 0; while (auto *OASE = dyn_cast(Base->IgnoreParens())) { Index: lib/AST/StmtPrinter.cpp =================================================================== --- lib/AST/StmtPrinter.cpp +++ lib/AST/StmtPrinter.cpp @@ -1891,7 +1891,8 @@ // AtomicExpr stores its subexpressions in a permuted order. PrintExpr(Node->getPtr()); if (Node->getOp() != AtomicExpr::AO__c11_atomic_load && - Node->getOp() != AtomicExpr::AO__atomic_load_n) { + Node->getOp() != AtomicExpr::AO__atomic_load_n && + Node->getOp() != AtomicExpr::AO__opencl_atomic_load) { OS << ", "; PrintExpr(Node->getVal1()); } @@ -1905,7 +1906,8 @@ OS << ", "; PrintExpr(Node->getWeak()); } - if (Node->getOp() != AtomicExpr::AO__c11_atomic_init) { + if (Node->getOp() != AtomicExpr::AO__c11_atomic_init && + Node->getOp() != AtomicExpr::AO__opencl_atomic_init) { OS << ", "; PrintExpr(Node->getOrder()); } Index: lib/Basic/Targets/AMDGPU.cpp =================================================================== --- lib/Basic/Targets/AMDGPU.cpp +++ lib/Basic/Targets/AMDGPU.cpp @@ -328,6 +328,8 @@ PtrDiffType = SignedLong; IntPtrType = SignedLong; } + + MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; } void AMDGPUTargetInfo::adjust(LangOptions &Opts) { Index: lib/CodeGen/CGAtomic.cpp =================================================================== --- lib/CodeGen/CGAtomic.cpp +++ lib/CodeGen/CGAtomic.cpp @@ -15,6 +15,7 @@ #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/IR/DataLayout.h" @@ -359,13 +360,15 @@ Address Val1, Address Val2, uint64_t Size, llvm::AtomicOrdering SuccessOrder, - llvm::AtomicOrdering FailureOrder) { + llvm::AtomicOrdering FailureOrder, + llvm::SyncScope::ID Scope) { // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment. llvm::Value *Expected = CGF.Builder.CreateLoad(Val1); llvm::Value *Desired = CGF.Builder.CreateLoad(Val2); llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg( - Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder); + Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder, + Scope); Pair->setVolatile(E->isVolatile()); Pair->setWeak(IsWeak); @@ -407,7 +410,8 @@ Address Val1, Address Val2, llvm::Value *FailureOrderVal, uint64_t Size, - llvm::AtomicOrdering SuccessOrder) { + llvm::AtomicOrdering SuccessOrder, + llvm::SyncScope::ID Scope) { llvm::AtomicOrdering FailureOrder; if (llvm::ConstantInt *FO = dyn_cast(FailureOrderVal)) { auto FOS = FO->getSExtValue(); @@ -435,7 +439,7 @@ llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder); } emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, - FailureOrder); + FailureOrder, Scope); return; } @@ -460,13 +464,13 @@ // doesn't fold to a constant for the ordering. CGF.Builder.SetInsertPoint(MonotonicBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::AtomicOrdering::Monotonic); + Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope); CGF.Builder.CreateBr(ContBB); if (AcquireBB) { CGF.Builder.SetInsertPoint(AcquireBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::AtomicOrdering::Acquire); + Size, SuccessOrder, llvm::AtomicOrdering::Acquire, Scope); CGF.Builder.CreateBr(ContBB); SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); @@ -476,7 +480,7 @@ if (SeqCstBB) { CGF.Builder.SetInsertPoint(SeqCstBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, - llvm::AtomicOrdering::SequentiallyConsistent); + llvm::AtomicOrdering::SequentiallyConsistent, Scope); CGF.Builder.CreateBr(ContBB); SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); @@ -488,27 +492,31 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, Address Ptr, Address Val1, Address Val2, llvm::Value *IsWeak, llvm::Value *FailureOrder, - uint64_t Size, llvm::AtomicOrdering Order) { + uint64_t Size, llvm::AtomicOrdering Order, + llvm::SyncScope::ID Scope) { llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add; llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled!"); case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); return; case AtomicExpr::AO__c11_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); return; case AtomicExpr::AO__atomic_compare_exchange: case AtomicExpr::AO__atomic_compare_exchange_n: { if (llvm::ConstantInt *IsWeakC = dyn_cast(IsWeak)) { emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr, - Val1, Val2, FailureOrder, Size, Order); + Val1, Val2, FailureOrder, Size, Order, Scope); } else { // Create all the relevant BB's llvm::BasicBlock *StrongBB = @@ -522,12 +530,12 @@ CGF.Builder.SetInsertPoint(StrongBB); emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); CGF.Builder.CreateBr(ContBB); CGF.Builder.SetInsertPoint(WeakBB); emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); CGF.Builder.CreateBr(ContBB); CGF.Builder.SetInsertPoint(ContBB); @@ -535,26 +543,29 @@ return; } case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: { llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr); - Load->setAtomic(Order); + Load->setAtomic(Order, Scope); Load->setVolatile(E->isVolatile()); CGF.Builder.CreateStore(Load, Dest); return; } case AtomicExpr::AO__c11_atomic_store: + case AtomicExpr::AO__opencl_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: { llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr); - Store->setAtomic(Order); + Store->setAtomic(Order, Scope); Store->setVolatile(E->isVolatile()); return; } case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: Op = llvm::AtomicRMWInst::Xchg; @@ -564,6 +575,7 @@ PostOp = llvm::Instruction::Add; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: Op = llvm::AtomicRMWInst::Add; break; @@ -572,14 +584,26 @@ PostOp = llvm::Instruction::Sub; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: Op = llvm::AtomicRMWInst::Sub; break; + case AtomicExpr::AO__opencl_atomic_fetch_min: + Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min + : llvm::AtomicRMWInst::UMin; + break; + + case AtomicExpr::AO__opencl_atomic_fetch_max: + Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max + : llvm::AtomicRMWInst::UMax; + break; + case AtomicExpr::AO__atomic_and_fetch: PostOp = llvm::Instruction::And; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: Op = llvm::AtomicRMWInst::And; break; @@ -588,6 +612,7 @@ PostOp = llvm::Instruction::Or; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: Op = llvm::AtomicRMWInst::Or; break; @@ -596,6 +621,7 @@ PostOp = llvm::Instruction::Xor; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: Op = llvm::AtomicRMWInst::Xor; break; @@ -610,7 +636,7 @@ llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::AtomicRMWInst *RMWI = - CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order); + CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope); RMWI->setVolatile(E->isVolatile()); // For __atomic_*_fetch operations, perform the operation again to @@ -677,19 +703,23 @@ Address Dest = Address::invalid(); Address Ptr(EmitScalarExpr(E->getPtr()), alignChars); - if (E->getOp() == AtomicExpr::AO__c11_atomic_init) { + if (E->getOp() == AtomicExpr::AO__c11_atomic_init || + E->getOp() == AtomicExpr::AO__opencl_atomic_init) { LValue lvalue = MakeAddrLValue(Ptr, AtomicTy); EmitAtomicInit(E->getVal1(), lvalue); return RValue::get(nullptr); } llvm::Value *Order = EmitScalarExpr(E->getOrder()); + llvm::Value *Scope = EmitScalarExpr(E->getScope()); switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled above with EmitAtomicInit!"); case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__atomic_load_n: break; @@ -708,6 +738,8 @@ case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: case AtomicExpr::AO__atomic_compare_exchange_n: case AtomicExpr::AO__atomic_compare_exchange: Val1 = EmitPointerWithAlignment(E->getVal1()); @@ -716,12 +748,15 @@ else Val2 = EmitValToTemp(*this, E->getVal2()); OrderFail = EmitScalarExpr(E->getOrderFail()); - if (E->getNumSubExprs() == 6) + if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n || + E->getOp() == AtomicExpr::AO__atomic_compare_exchange) IsWeak = EmitScalarExpr(E->getWeak()); break; case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_sub: if (MemTy->isPointerType()) { // For pointer arithmetic, we're required to do a bit of math: // adding 1 to an int* is not the same as adding 1 to a uintptr_t. @@ -744,11 +779,18 @@ case AtomicExpr::AO__atomic_sub_fetch: case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_store_n: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_xor: @@ -784,18 +826,26 @@ bool UseOptimizedLibcall = false; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled above with EmitAtomicInit!"); case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_nand: case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_xor: case AtomicExpr::AO__atomic_add_fetch: case AtomicExpr::AO__atomic_and_fetch: @@ -812,6 +862,11 @@ case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__opencl_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: case AtomicExpr::AO__atomic_store_n: @@ -833,7 +888,24 @@ getContext().getSizeType()); } // Atomic address is the first or second parameter - Args.add(RValue::get(EmitCastToVoidPtr(Ptr.getPointer())), + // The OpenCL atomic library functions only accept pointer arguments to + // generic address space. + auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) { + if (!E->isOpenCL()) + return V; + auto AS = PT->getAs()->getPointeeType().getAddressSpace(); + if (AS == LangAS::opencl_generic) + return V; + auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); + auto T = V->getType(); + auto *DestType = T->getPointerElementType()->getPointerTo(DestAS); + + return getTargetHooks().performAddrSpaceCast( + *this, V, AS, LangAS::opencl_generic, DestType, false); + }; + + Args.add(RValue::get(CastToGenericAddrSpace( + EmitCastToVoidPtr(Ptr.getPointer()), E->getPtr()->getType())), getContext().VoidPtrTy); std::string LibCallName; @@ -844,6 +916,7 @@ llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled!"); // There is only one libcall for compare an exchange, because there is no @@ -855,13 +928,17 @@ // int success, int failure) case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_compare_exchange: case AtomicExpr::AO__atomic_compare_exchange_n: LibCallName = "__atomic_compare_exchange"; RetTy = getContext().BoolTy; HaveRetTy = true; - Args.add(RValue::get(EmitCastToVoidPtr(Val1.getPointer())), - getContext().VoidPtrTy); + Args.add( + RValue::get(CastToGenericAddrSpace( + EmitCastToVoidPtr(Val1.getPointer()), E->getVal1()->getType())), + getContext().VoidPtrTy); AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(), MemTy, E->getExprLoc(), sizeChars); Args.add(RValue::get(Order), getContext().IntTy); @@ -871,6 +948,7 @@ // int order) // T __atomic_exchange_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: LibCallName = "__atomic_exchange"; @@ -880,6 +958,7 @@ // void __atomic_store(size_t size, void *mem, void *val, int order) // void __atomic_store_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_store: + case AtomicExpr::AO__opencl_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: LibCallName = "__atomic_store"; @@ -891,6 +970,7 @@ // void __atomic_load(size_t size, void *mem, void *return, int order) // T __atomic_load_N(T *mem, int order) case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__atomic_load: case AtomicExpr::AO__atomic_load_n: LibCallName = "__atomic_load"; @@ -901,6 +981,7 @@ PostOp = llvm::Instruction::Add; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: LibCallName = "__atomic_fetch_add"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -912,6 +993,7 @@ PostOp = llvm::Instruction::And; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: LibCallName = "__atomic_fetch_and"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -923,6 +1005,7 @@ PostOp = llvm::Instruction::Or; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: LibCallName = "__atomic_fetch_or"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -934,6 +1017,7 @@ PostOp = llvm::Instruction::Sub; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: LibCallName = "__atomic_fetch_sub"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -945,11 +1029,26 @@ PostOp = llvm::Instruction::Xor; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: LibCallName = "__atomic_fetch_xor"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), sizeChars); break; + case AtomicExpr::AO__opencl_atomic_fetch_min: + LibCallName = E->getValueType()->isSignedIntegerType() + ? "__atomic_fetch_min" + : "__atomic_fetch_umin"; + AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), + LoweredMemTy, E->getExprLoc(), sizeChars); + break; + case AtomicExpr::AO__opencl_atomic_fetch_max: + LibCallName = E->getValueType()->isSignedIntegerType() + ? "__atomic_fetch_max" + : "__atomic_fetch_umax"; + AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), + LoweredMemTy, E->getExprLoc(), sizeChars); + break; // T __atomic_nand_fetch_N(T *mem, T val, int order) // T __atomic_fetch_nand_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_nand_fetch: @@ -962,6 +1061,11 @@ break; } + if (E->isOpenCL()) { + LibCallName = std::string("__opencl") + + StringRef(LibCallName).drop_front(1).str(); + + } // Optimized functions have the size in their name. if (UseOptimizedLibcall) LibCallName += "_" + llvm::utostr(Size); @@ -982,6 +1086,8 @@ // order is always the last parameter Args.add(RValue::get(Order), getContext().IntTy); + if (E->isOpenCL()) + Args.add(RValue::get(Scope), getContext().IntTy); // PostOp is only needed for the atomic_*_fetch operations, and // thus is only needed for and implemented in the @@ -1018,12 +1124,20 @@ } bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store || + E->getOp() == AtomicExpr::AO__opencl_atomic_store || E->getOp() == AtomicExpr::AO__atomic_store || E->getOp() == AtomicExpr::AO__atomic_store_n; bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load || + E->getOp() == AtomicExpr::AO__opencl_atomic_load || E->getOp() == AtomicExpr::AO__atomic_load || E->getOp() == AtomicExpr::AO__atomic_load_n; + assert(isa(Scope) && + "Non-constant synchronization scope not supported"); + auto SCID = getTargetHooks().getLLVMSyncScopeID( + static_cast(cast(Scope)->getZExtValue()), + getLLVMContext()); + if (isa(Order)) { auto ord = cast(Order)->getZExtValue(); // We should not ever get to a case where the ordering isn't a valid C ABI @@ -1032,30 +1146,30 @@ switch ((llvm::AtomicOrderingCABI)ord) { case llvm::AtomicOrderingCABI::relaxed: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Monotonic); + llvm::AtomicOrdering::Monotonic, SCID); break; case llvm::AtomicOrderingCABI::consume: case llvm::AtomicOrderingCABI::acquire: if (IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Acquire); + llvm::AtomicOrdering::Acquire, SCID); break; case llvm::AtomicOrderingCABI::release: if (IsLoad) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Release); + llvm::AtomicOrdering::Release, SCID); break; case llvm::AtomicOrderingCABI::acq_rel: if (IsLoad || IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::AcquireRelease); + llvm::AtomicOrdering::AcquireRelease, SCID); break; case llvm::AtomicOrderingCABI::seq_cst: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::SequentiallyConsistent); + llvm::AtomicOrdering::SequentiallyConsistent, SCID); break; } if (RValTy->isVoidType()) @@ -1091,13 +1205,13 @@ // Emit all the different atomics Builder.SetInsertPoint(MonotonicBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::Monotonic); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Monotonic, SCID); Builder.CreateBr(ContBB); if (!IsStore) { Builder.SetInsertPoint(AcquireBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::Acquire); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Acquire, SCID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); @@ -1106,23 +1220,23 @@ } if (!IsLoad) { Builder.SetInsertPoint(ReleaseBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::Release); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Release, SCID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release), ReleaseBB); } if (!IsLoad && !IsStore) { Builder.SetInsertPoint(AcqRelBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::AcquireRelease); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::AcquireRelease, SCID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel), AcqRelBB); } Builder.SetInsertPoint(SeqCstBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::SequentiallyConsistent); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::SequentiallyConsistent, SCID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -48,7 +48,7 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) { unsigned addressSpace = - cast(value->getType())->getAddressSpace(); + cast(value->getType())->getAddressSpace(); llvm::PointerType *destType = Int8PtrTy; if (addressSpace) Index: lib/CodeGen/TargetInfo.h =================================================================== --- lib/CodeGen/TargetInfo.h +++ lib/CodeGen/TargetInfo.h @@ -19,6 +19,7 @@ #include "CGValue.h" #include "clang/AST/Type.h" #include "clang/Basic/LLVM.h" +#include "clang/Basic/SyncScope.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -260,6 +261,10 @@ virtual llvm::Constant * performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *V, unsigned SrcAddr, unsigned DestAddr, llvm::Type *DestTy) const; + + /// Get the syncscope used in LLVM IR. + virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const; }; } // namespace CodeGen Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -444,6 +444,11 @@ return llvm::ConstantExpr::getPointerCast(Src, DestTy); } +llvm::SyncScope::ID +TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const { + return C.getOrInsertSyncScopeID(""); /* default sync scope */ +} + static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); /// isEmptyField - Return true iff a the field is "empty", that is it @@ -7430,6 +7435,8 @@ } unsigned getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const override; }; } @@ -7539,6 +7546,26 @@ return DefaultGlobalAS; } +llvm::SyncScope::ID +AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const { + StringRef Name; + switch (S) { + case SyncScope::OpenCLWorkGroup: + Name = "workgroup"; + break; + case SyncScope::OpenCLDevice: + Name = "agent"; + break; + case SyncScope::OpenCLAllSVMDevices: + Name = ""; + break; + case SyncScope::OpenCLSubGroup: + Name = "subgroup"; + } + return C.getOrInsertSyncScopeID(Name); +} + //===----------------------------------------------------------------------===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. Index: lib/Frontend/InitPreprocessor.cpp =================================================================== --- lib/Frontend/InitPreprocessor.cpp +++ lib/Frontend/InitPreprocessor.cpp @@ -14,6 +14,7 @@ #include "clang/Basic/FileManager.h" #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/SyncScope.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Version.h" #include "clang/Frontend/FrontendDiagnostic.h" @@ -575,6 +576,18 @@ Builder.defineMacro("__ATOMIC_ACQ_REL", "4"); Builder.defineMacro("__ATOMIC_SEQ_CST", "5"); + // Define macros for the OpenCL memory scope. + // The values should match clang SyncScope enum. + assert(static_cast(SyncScope::OpenCLWorkGroup) == 1 && + static_cast(SyncScope::OpenCLDevice) == 2 && + static_cast(SyncScope::OpenCLAllSVMDevices) == 3 && + static_cast(SyncScope::OpenCLSubGroup) == 4); + Builder.defineMacro("__OPENCL_MEMORY_SCOPE_WORK_ITEM", "0"); + Builder.defineMacro("__OPENCL_MEMORY_SCOPE_WORK_GROUP", "1"); + Builder.defineMacro("__OPENCL_MEMORY_SCOPE_DEVICE", "2"); + Builder.defineMacro("__OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES", "3"); + Builder.defineMacro("__OPENCL_MEMORY_SCOPE_SUB_GROUP", "4"); + // Support for #pragma redefine_extname (Sun compatibility) Builder.defineMacro("__PRAGMA_REDEFINE_EXTNAME", "1"); Index: lib/Headers/opencl-c.h =================================================================== --- lib/Headers/opencl-c.h +++ lib/Headers/opencl-c.h @@ -13141,13 +13141,14 @@ #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -typedef enum memory_scope -{ - memory_scope_work_item, - memory_scope_work_group, - memory_scope_device, - memory_scope_all_svm_devices, - memory_scope_sub_group +typedef enum memory_scope { + memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, +#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) + memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +#endif } memory_scope; void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope); @@ -13952,11 +13953,11 @@ // enum values aligned with what clang uses in EmitAtomicExpr() typedef enum memory_order { - memory_order_relaxed, - memory_order_acquire, - memory_order_release, - memory_order_acq_rel, - memory_order_seq_cst + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST } memory_order; // double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics Index: lib/Sema/SemaChecking.cpp =================================================================== --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -25,6 +25,7 @@ #include "clang/AST/StmtObjC.h" #include "clang/Analysis/Analyses/FormatString.h" #include "clang/Basic/CharInfo.h" +#include "clang/Basic/SyncScope.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Lexer.h" // TODO: Extract static functions to fix layering. @@ -2755,15 +2756,18 @@ auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering; switch (Op) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("There is no ordering argument for an init"); case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: return OrderingCABI != llvm::AtomicOrderingCABI::release && OrderingCABI != llvm::AtomicOrderingCABI::acq_rel; case AtomicExpr::AO__c11_atomic_store: + case AtomicExpr::AO__opencl_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: return OrderingCABI != llvm::AtomicOrderingCABI::consume && @@ -2780,7 +2784,9 @@ CallExpr *TheCall = cast(TheCallResult.get()); DeclRefExpr *DRE =cast(TheCall->getCallee()->IgnoreParenCasts()); - // All these operations take one of the following forms: + // All the non-OpenCL operations take one of the following forms. + // The OpenCL operations take the __c11 forms with one extra argument for + // synchronization scope. enum { // C __c11_atomic_init(A *, C) Init, @@ -2801,6 +2807,7 @@ // bool __atomic_compare_exchange(A *, C *, CP, bool, int, int) GNUCmpXchg } Form = Init; + const unsigned NumForm = GNUCmpXchg + 1; const unsigned NumArgs[] = { 2, 2, 3, 3, 3, 3, 4, 5, 6 }; const unsigned NumVals[] = { 1, 0, 1, 1, 1, 1, 2, 2, 3 }; // where: @@ -2810,12 +2817,18 @@ // M is C if C is an integer, and ptrdiff_t if C is a pointer, and // the int parameters are for orderings. + static_assert(sizeof(NumArgs)/sizeof(NumArgs[0]) == NumForm + && sizeof(NumVals)/sizeof(NumVals[0]) == NumForm, + "need to update code for modified forms"); static_assert(AtomicExpr::AO__c11_atomic_init == 0 && AtomicExpr::AO__c11_atomic_fetch_xor + 1 == AtomicExpr::AO__atomic_load, "need to update code for modified C11 atomics"); - bool IsC11 = Op >= AtomicExpr::AO__c11_atomic_init && - Op <= AtomicExpr::AO__c11_atomic_fetch_xor; + bool IsOpenCL = Op >= AtomicExpr::AO__opencl_atomic_init && + Op <= AtomicExpr::AO__opencl_atomic_fetch_max; + bool IsC11 = (Op >= AtomicExpr::AO__c11_atomic_init && + Op <= AtomicExpr::AO__c11_atomic_fetch_xor) || + IsOpenCL; bool IsN = Op == AtomicExpr::AO__atomic_load_n || Op == AtomicExpr::AO__atomic_store_n || Op == AtomicExpr::AO__atomic_exchange_n || @@ -2824,10 +2837,12 @@ switch (Op) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: Form = Init; break; case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__atomic_load_n: Form = Load; break; @@ -2837,6 +2852,7 @@ break; case AtomicExpr::AO__c11_atomic_store: + case AtomicExpr::AO__opencl_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: Form = Copy; @@ -2844,6 +2860,10 @@ case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__atomic_add_fetch: @@ -2853,6 +2873,9 @@ case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_xor: @@ -2865,6 +2888,7 @@ break; case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: Form = Xchg; break; @@ -2875,6 +2899,8 @@ case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: Form = C11CmpXchg; break; @@ -2884,16 +2910,19 @@ break; } + unsigned AdjustedNumArgs = NumArgs[Form]; + if (IsOpenCL && Op != AtomicExpr::AO__opencl_atomic_init) + ++AdjustedNumArgs; // Check we have the right number of arguments. - if (TheCall->getNumArgs() < NumArgs[Form]) { + if (TheCall->getNumArgs() < AdjustedNumArgs) { Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) - << 0 << NumArgs[Form] << TheCall->getNumArgs() + << 0 << AdjustedNumArgs << TheCall->getNumArgs() << TheCall->getCallee()->getSourceRange(); return ExprError(); - } else if (TheCall->getNumArgs() > NumArgs[Form]) { - Diag(TheCall->getArg(NumArgs[Form])->getLocStart(), + } else if (TheCall->getNumArgs() > AdjustedNumArgs) { + Diag(TheCall->getArg(AdjustedNumArgs)->getLocStart(), diag::err_typecheck_call_too_many_args) - << 0 << NumArgs[Form] << TheCall->getNumArgs() + << 0 << AdjustedNumArgs << TheCall->getNumArgs() << TheCall->getCallee()->getSourceRange(); return ExprError(); } @@ -2921,9 +2950,11 @@ << Ptr->getType() << Ptr->getSourceRange(); return ExprError(); } - if (AtomTy.isConstQualified()) { + if (AtomTy.isConstQualified() || + AtomTy.getAddressSpace() == LangAS::opencl_constant) { Diag(DRE->getLocStart(), diag::err_atomic_op_needs_non_const_atomic) - << Ptr->getType() << Ptr->getSourceRange(); + << (AtomTy.isConstQualified() ? 0 : 1) << Ptr->getType() + << Ptr->getSourceRange(); return ExprError(); } ValType = AtomTy->getAs()->getValueType(); @@ -2992,7 +3023,8 @@ ValType.removeLocalVolatile(); ValType.removeLocalConst(); QualType ResultType = ValType; - if (Form == Copy || Form == LoadCopy || Form == GNUXchg || Form == Init) + if (Form == Copy || Form == LoadCopy || Form == GNUXchg || + Form == Init) ResultType = Context.VoidTy; else if (Form == C11CmpXchg || Form == GNUCmpXchg) ResultType = Context.BoolTy; @@ -3006,7 +3038,7 @@ // The first argument --- the pointer --- has a fixed type; we // deduce the types of the rest of the arguments accordingly. Walk // the remaining arguments, converting them to the deduced value type. - for (unsigned i = 1; i != NumArgs[Form]; ++i) { + for (unsigned i = 1; i != TheCall->getNumArgs(); ++i) { QualType Ty; if (i < NumVals[Form] + 1) { switch (i) { @@ -3048,7 +3080,7 @@ break; } } else { - // The order(s) are always converted to int. + // The order(s) and scope are always converted to int. Ty = Context.IntTy; } @@ -3061,6 +3093,27 @@ TheCall->setArg(i, Arg.get()); } + Expr *Scope; + if (Form != Init) { + if (IsOpenCL) { + Scope = TheCall->getArg(TheCall->getNumArgs() - 1); + llvm::APSInt Result(32); + if (!Scope->isIntegerConstantExpr(Result, Context)) + Diag(Scope->getLocStart(), + diag::err_atomic_op_has_non_constant_synch_scope) + << Scope->getSourceRange(); + else if (!isValidSyncScopeValue(Result.getZExtValue())) + Diag(Scope->getLocStart(), diag::err_atomic_op_has_invalid_synch_scope) + << Scope->getSourceRange(); + } else { + Scope = IntegerLiteral::Create( + Context, + llvm::APInt(Context.getTypeSize(Context.IntTy), + static_cast(SyncScope::OpenCLAllSVMDevices)), + Context.IntTy, SourceLocation()); + } + } + // Permute the arguments into a 'consistent' order. SmallVector SubExprs; SubExprs.push_back(Ptr); @@ -3071,28 +3124,33 @@ break; case Load: SubExprs.push_back(TheCall->getArg(1)); // Order + SubExprs.push_back(Scope); // Scope break; case LoadCopy: case Copy: case Arithmetic: case Xchg: SubExprs.push_back(TheCall->getArg(2)); // Order + SubExprs.push_back(Scope); // Scope SubExprs.push_back(TheCall->getArg(1)); // Val1 break; case GNUXchg: // Note, AtomicExpr::getVal2() has a special case for this atomic. SubExprs.push_back(TheCall->getArg(3)); // Order + SubExprs.push_back(Scope); // Scope SubExprs.push_back(TheCall->getArg(1)); // Val1 SubExprs.push_back(TheCall->getArg(2)); // Val2 break; case C11CmpXchg: SubExprs.push_back(TheCall->getArg(3)); // Order + SubExprs.push_back(Scope); // Scope SubExprs.push_back(TheCall->getArg(1)); // Val1 SubExprs.push_back(TheCall->getArg(4)); // OrderFail SubExprs.push_back(TheCall->getArg(2)); // Val2 break; case GNUCmpXchg: SubExprs.push_back(TheCall->getArg(4)); // Order + SubExprs.push_back(Scope); // Scope SubExprs.push_back(TheCall->getArg(1)); // Val1 SubExprs.push_back(TheCall->getArg(5)); // OrderFail SubExprs.push_back(TheCall->getArg(2)); // Val2 @@ -3114,10 +3172,14 @@ TheCall->getRParenLoc()); if ((Op == AtomicExpr::AO__c11_atomic_load || - (Op == AtomicExpr::AO__c11_atomic_store)) && + Op == AtomicExpr::AO__c11_atomic_store || + Op == AtomicExpr::AO__opencl_atomic_load || + Op == AtomicExpr::AO__opencl_atomic_store ) && Context.AtomicUsesUnsupportedLibcall(AE)) - Diag(AE->getLocStart(), diag::err_atomic_load_store_uses_lib) << - ((Op == AtomicExpr::AO__c11_atomic_load) ? 0 : 1); + Diag(AE->getLocStart(), diag::err_atomic_load_store_uses_lib) + << ((Op == AtomicExpr::AO__c11_atomic_load || + Op == AtomicExpr::AO__opencl_atomic_load) + ? 0 : 1); return AE; } Index: test/CodeGenOpenCL/atomic-ops-libcall.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/atomic-ops-libcall.cl @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple spir64 -emit-llvm | FileCheck -check-prefix=SPIR %s +// RUN: %clang_cc1 < %s -cl-std=CL2.0 -finclude-default-header -triple armv5e-none-linux-gnueabi -emit-llvm | FileCheck -check-prefix=ARM %s + +void f(atomic_int *i, atomic_uint *ui, int cmp) { + int x; + // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8 addrspace(4)* {{%[0-9]+}}, i32 5, i32 1) + // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8* {{%[0-9]+}}, i32 5, i32 1) + x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); + // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) + // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) + __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) + // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) + x = __opencl_atomic_fetch_add(i, 3, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) + // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) + x = __opencl_atomic_fetch_min(i, 3, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) + // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1) + x = __opencl_atomic_fetch_min(ui, 3, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1) + // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1) + x = __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1) + // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1) + x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); + // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 2) + // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 2) + x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_device); + // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 3) + // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* {{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 3) + x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_all_svm_devices); +#ifdef cl_khr_subgroups + // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 4) + x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_sub_group); +#endif +} Index: test/CodeGenOpenCL/atomic-ops.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/atomic-ops.cl @@ -0,0 +1,176 @@ +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -finclude-default-header -O0 -o - -triple=amdgcn-amd-amdhsa-opencl | FileCheck %s + +// Also test serialization of atomic operations here, to avoid duplicating the test. +// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-opencl +// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | FileCheck %s + +#ifndef ALREADY_INCLUDED +#define ALREADY_INCLUDED + +atomic_int j; + +void fi1(atomic_int *i) { + // CHECK-LABEL: @fi1 + // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); + // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst + x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); + // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst + x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); + // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst + x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); +} + +void fi2(atomic_int *i) { + // CHECK-LABEL: @fi2 + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); +} + +void fi3(atomic_int *i, atomic_uint *ui) { + // CHECK-LABEL: @fi3 + // CHECK: atomicrmw and i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); + // CHECK: atomicrmw min i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); + // CHECK: atomicrmw max i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); + // CHECK: atomicrmw umin i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group); + // CHECK: atomicrmw umax i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group); +} + +bool fi4(atomic_int *i) { + // CHECK-LABEL: @fi4( + // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32 addrspace(4)* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire + // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 + // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 + // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] + // CHECK: store i32 [[OLD]] + int cmp = 0; + return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group); +} + +float ff1(global atomic_float *d) { + // CHECK-LABEL: @ff1 + // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic + return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group); +} + +void ff2(atomic_float *d) { + // CHECK-LABEL: @ff2 + // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release + __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group); +} + +float ff3(atomic_float *d) { + // CHECK-LABEL: @ff3 + // CHECK: atomicrmw xchg i32 addrspace(4)* {{.*}} syncscope("workgroup") seq_cst + return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group); +} + +// CHECK-LABEL: @atomic_init_foo +void atomic_init_foo() +{ + // CHECK-NOT: atomic + // CHECK: store + __opencl_atomic_init(&j, 42); + + // CHECK-NOT: atomic + // CHECK: } +} + +// CHECK-LABEL: @failureOrder +void failureOrder(atomic_int *ptr, int *ptr2) { + // CHECK: cmpxchg i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic + __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); + + // CHECK: cmpxchg weak i32 addrspace(4)* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire + __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group); +} + +// CHECK-LABEL: @generalFailureOrder +void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) { + __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group); + // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQUIRE]] + // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]] + + // CHECK: [[MONOTONIC]] + // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: ] + + // CHECK: [[ACQUIRE]] + // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[RELEASE]] + // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: ] + + // CHECK: [[ACQREL]] + // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[SEQCST]] + // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[MONOTONIC_MONOTONIC]] + // CHECK: cmpxchg {{.*}} monotonic monotonic + // CHECK: br + + // CHECK: [[ACQUIRE_MONOTONIC]] + // CHECK: cmpxchg {{.*}} acquire monotonic + // CHECK: br + + // CHECK: [[ACQUIRE_ACQUIRE]] + // CHECK: cmpxchg {{.*}} acquire acquire + // CHECK: br + + // CHECK: [[ACQREL_MONOTONIC]] + // CHECK: cmpxchg {{.*}} acq_rel monotonic + // CHECK: br + + // CHECK: [[ACQREL_ACQUIRE]] + // CHECK: cmpxchg {{.*}} acq_rel acquire + // CHECK: br + + // CHECK: [[SEQCST_MONOTONIC]] + // CHECK: cmpxchg {{.*}} seq_cst monotonic + // CHECK: br + + // CHECK: [[SEQCST_ACQUIRE]] + // CHECK: cmpxchg {{.*}} seq_cst acquire + // CHECK: br + + // CHECK: [[SEQCST_SEQCST]] + // CHECK: cmpxchg {{.*}} seq_cst seq_cst + // CHECK: br +} + +int test_volatile(volatile atomic_int *i) { + // CHECK-LABEL: @test_volatile + // CHECK: %[[i_addr:.*]] = alloca i32 + // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32 + // CHECK-NEXT: store i32 addrspace(4)* %i, i32 addrspace(4)** %[[i_addr]] + // CHECK-NEXT: %[[addr:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[i_addr]] + // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32 addrspace(4)* %[[addr]] syncscope("workgroup") seq_cst + // CHECK-NEXT: store i32 %[[res]], i32* %[[atomicdst]] + // CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]] + // CHECK-NEXT: ret i32 %[[retval]] + return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); +} + +#endif Index: test/Preprocessor/init.c =================================================================== --- test/Preprocessor/init.c +++ test/Preprocessor/init.c @@ -9177,6 +9177,11 @@ // WEBASSEMBLY32-NOT:#define __LP64__ // WEBASSEMBLY32-NEXT:#define __NO_INLINE__ 1 // WEBASSEMBLY32-NEXT:#define __OBJC_BOOL_IS_BOOL 0 +// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3 +// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2 +// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4 +// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 +// WEBASSEMBLY32-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 // WEBASSEMBLY32-NEXT:#define __ORDER_BIG_ENDIAN__ 4321 // WEBASSEMBLY32-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234 // WEBASSEMBLY32-NEXT:#define __ORDER_PDP_ENDIAN__ 3412 @@ -9503,6 +9508,11 @@ // WEBASSEMBLY64-NEXT:#define __LP64__ 1 // WEBASSEMBLY64-NEXT:#define __NO_INLINE__ 1 // WEBASSEMBLY64-NEXT:#define __OBJC_BOOL_IS_BOOL 0 +// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3 +// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_DEVICE 2 +// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4 +// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 +// WEBASSEMBLY64-NEXT:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 // WEBASSEMBLY64-NEXT:#define __ORDER_BIG_ENDIAN__ 4321 // WEBASSEMBLY64-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234 // WEBASSEMBLY64-NEXT:#define __ORDER_PDP_ENDIAN__ 3412 Index: test/Preprocessor/predefined-macros.c =================================================================== --- test/Preprocessor/predefined-macros.c +++ test/Preprocessor/predefined-macros.c @@ -185,6 +185,14 @@ // CHECK-CL20-NOT: #define __FAST_RELAXED_MATH__ 1 // CHECK-FRM: #define __FAST_RELAXED_MATH__ 1 +// RUN: %clang_cc1 %s -E -dM -o - -x cl \ +// RUN: | FileCheck %s --check-prefix=MSCOPE +// MSCOPE:#define __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES 3 +// MSCOPE:#define __OPENCL_MEMORY_SCOPE_DEVICE 2 +// MSCOPE:#define __OPENCL_MEMORY_SCOPE_SUB_GROUP 4 +// MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 +// MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 + // RUN: %clang_cc1 -triple aarch64-windows %s -E -dM -o - -x cl \ // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-ARM64-WIN Index: test/SemaOpenCL/atomic-ops.cl =================================================================== --- /dev/null +++ test/SemaOpenCL/atomic-ops.cl @@ -0,0 +1,161 @@ +// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -verify -fsyntax-only -triple=spir64 +// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -verify -fsyntax-only -triple=amdgcn-amdhsa-amd-opencl + +// Basic parsing/Sema tests for __opencl_atomic_* + +#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable + +struct S { char c[3]; }; + +char i8; +short i16; +int i32; +int8 i64; + +atomic_int gn; + +void f(atomic_int *i, const atomic_int *ci, + atomic_intptr_t *p, atomic_float *d, + int *I, const int *CI, + intptr_t *P, float *D, struct S *s1, struct S *s2, + global atomic_int *i_g, local atomic_int *i_l, private atomic_int *i_p, + constant atomic_int *i_c) { + __opencl_atomic_init(I, 5); // expected-error {{address argument to atomic operation must be a pointer to _Atomic type ('__generic int *' invalid)}} + __opencl_atomic_init(ci, 5); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}} + + __opencl_atomic_load(0); // expected-error {{too few arguments to function call, expected 3, have 1}} + __opencl_atomic_load(0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}} + __opencl_atomic_store(0,0,0,0); // expected-error {{address argument to atomic builtin must be a pointer}} + __opencl_atomic_store((int *)0, 0, 0, 0); // expected-error {{address argument to atomic operation must be a pointer to _Atomic type ('__generic int *' invalid)}} + __opencl_atomic_store(i, 0, memory_order_relaxed, memory_scope_work_group); + __opencl_atomic_store(ci, 0, memory_order_relaxed, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}} + __opencl_atomic_store(i_g, 0, memory_order_relaxed, memory_scope_work_group); + __opencl_atomic_store(i_l, 0, memory_order_relaxed, memory_scope_work_group); + __opencl_atomic_store(i_p, 0, memory_order_relaxed, memory_scope_work_group); + __opencl_atomic_store(i_c, 0, memory_order_relaxed, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-constant _Atomic type ('__constant atomic_int *' (aka '__constant _Atomic(int) *') invalid)}} + + __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); + __opencl_atomic_load(p, memory_order_seq_cst, memory_scope_work_group); + __opencl_atomic_load(d, memory_order_seq_cst, memory_scope_work_group); + __opencl_atomic_load(ci, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}} + + __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); + __opencl_atomic_store(p, 1, memory_order_seq_cst, memory_scope_work_group); + (int)__opencl_atomic_store(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{operand of type 'void' where arithmetic or pointer type is required}} + + int exchange_1 = __opencl_atomic_exchange(i, 1, memory_order_seq_cst, memory_scope_work_group); + int exchange_2 = __opencl_atomic_exchange(I, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to _Atomic}} + + __opencl_atomic_fetch_add(i, 1, memory_order_seq_cst, memory_scope_work_group); + __opencl_atomic_fetch_add(p, 1, memory_order_seq_cst, memory_scope_work_group); + __opencl_atomic_fetch_add(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}} + __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); + __opencl_atomic_fetch_and(p, 1, memory_order_seq_cst, memory_scope_work_group); + __opencl_atomic_fetch_and(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to bitwise atomic operation must be a pointer to atomic integer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}} + + __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); + __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); + __opencl_atomic_fetch_min(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}} + __opencl_atomic_fetch_max(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}} + + bool cmpexch_1 = __opencl_atomic_compare_exchange_strong(i, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); + bool cmpexch_2 = __opencl_atomic_compare_exchange_strong(p, P, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); + bool cmpexch_3 = __opencl_atomic_compare_exchange_strong(d, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{incompatible pointer types passing '__generic int *' to parameter of type '__generic float *'}} + (void)__opencl_atomic_compare_exchange_strong(i, CI, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{passing 'const __generic int *' to parameter of type '__generic int *' discards qualifiers}} + + bool cmpexchw_1 = __opencl_atomic_compare_exchange_weak(i, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); + bool cmpexchw_2 = __opencl_atomic_compare_exchange_weak(p, P, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); + bool cmpexchw_3 = __opencl_atomic_compare_exchange_weak(d, I, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{incompatible pointer types passing '__generic int *' to parameter of type '__generic float *'}} + (void)__opencl_atomic_compare_exchange_weak(i, CI, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); // expected-warning {{passing 'const __generic int *' to parameter of type '__generic int *' discards qualifiers}} + + // Pointers to different address spaces are allowed. + bool cmpexch_10 = __opencl_atomic_compare_exchange_strong((global atomic_int *)0x308, (constant int *)0x309, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group); + + __opencl_atomic_init(ci, 0); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}} + __opencl_atomic_store(ci, 0, memory_order_release, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}} + __opencl_atomic_load(ci, memory_order_acquire, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const __generic atomic_int *' (aka 'const __generic _Atomic(int) *') invalid)}} + + __opencl_atomic_init(&gn, 456); + __opencl_atomic_init(&gn, (void*)0); // expected-warning{{incompatible pointer to integer conversion passing '__generic void *' to parameter of type 'int'}} +} + +void memory_checks(atomic_int *Ap, int *p, int val) { + (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_load(Ap, memory_order_acquire, memory_scope_work_group); + (void)__opencl_atomic_load(Ap, memory_order_consume, memory_scope_work_group); // expected-error {{use of undeclared identifier 'memory_order_consume'}} + (void)__opencl_atomic_load(Ap, memory_order_release, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}} + (void)__opencl_atomic_load(Ap, memory_order_acq_rel, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}} + (void)__opencl_atomic_load(Ap, memory_order_seq_cst, memory_scope_work_group); + + (void)__opencl_atomic_store(Ap, val, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_store(Ap, val, memory_order_acquire, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}} + (void)__opencl_atomic_store(Ap, val, memory_order_release, memory_scope_work_group); + (void)__opencl_atomic_store(Ap, val, memory_order_acq_rel, memory_scope_work_group); // expected-warning {{memory order argument to atomic operation is invalid}} + (void)__opencl_atomic_store(Ap, val, memory_order_seq_cst, memory_scope_work_group); + + (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acquire, memory_scope_work_group); + (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_release, memory_scope_work_group); + (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_acq_rel, memory_scope_work_group); + (void)__opencl_atomic_fetch_add(Ap, 1, memory_order_seq_cst, memory_scope_work_group); + + (void)__opencl_atomic_init(Ap, val); + + (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acquire, memory_scope_work_group); + (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_release, memory_scope_work_group); + (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_acq_rel, memory_scope_work_group); + (void)__opencl_atomic_fetch_sub(Ap, val, memory_order_seq_cst, memory_scope_work_group); + + (void)__opencl_atomic_fetch_and(Ap, val, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acquire, memory_scope_work_group); + (void)__opencl_atomic_fetch_and(Ap, val, memory_order_release, memory_scope_work_group); + (void)__opencl_atomic_fetch_and(Ap, val, memory_order_acq_rel, memory_scope_work_group); + (void)__opencl_atomic_fetch_and(Ap, val, memory_order_seq_cst, memory_scope_work_group); + + (void)__opencl_atomic_fetch_or(Ap, val, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acquire, memory_scope_work_group); + (void)__opencl_atomic_fetch_or(Ap, val, memory_order_release, memory_scope_work_group); + (void)__opencl_atomic_fetch_or(Ap, val, memory_order_acq_rel, memory_scope_work_group); + (void)__opencl_atomic_fetch_or(Ap, val, memory_order_seq_cst, memory_scope_work_group); + + (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acquire, memory_scope_work_group); + (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_release, memory_scope_work_group); + (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_acq_rel, memory_scope_work_group); + (void)__opencl_atomic_fetch_xor(Ap, val, memory_order_seq_cst, memory_scope_work_group); + + (void)__opencl_atomic_exchange(Ap, val, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_exchange(Ap, val, memory_order_acquire, memory_scope_work_group); + (void)__opencl_atomic_exchange(Ap, val, memory_order_release, memory_scope_work_group); + (void)__opencl_atomic_exchange(Ap, val, memory_order_acq_rel, memory_scope_work_group); + (void)__opencl_atomic_exchange(Ap, val, memory_order_seq_cst, memory_scope_work_group); + + (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_compare_exchange_strong(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_group); + + (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_release, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_acq_rel, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_compare_exchange_weak(Ap, p, val, memory_order_seq_cst, memory_order_relaxed, memory_scope_work_group); +} + +void synchscope_checks(atomic_int *Ap, int scope) { + (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_item); // expected-error{{synchronization scope argument to atomic operation is invalid}} + (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_work_group); + (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_device); + (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_all_svm_devices); + (void)__opencl_atomic_load(Ap, memory_order_relaxed, memory_scope_sub_group); + (void)__opencl_atomic_load(Ap, memory_order_relaxed, scope); // expected-error{{non-constant synchronization scope argument to atomic operation is not supported}} + (void)__opencl_atomic_load(Ap, memory_order_relaxed, 10); //expected-error{{synchronization scope argument to atomic operation is invalid}} +} + +void nullPointerWarning(atomic_int *Ap, int *p, int val) { + // The 'expected' pointer shouldn't be NULL. + (void)__opencl_atomic_compare_exchange_strong(Ap, NULL, val, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group); // expected-warning {{null passed to a callee that requires a non-null argument}} +}