Index: docs/LanguageExtensions.rst =================================================================== --- docs/LanguageExtensions.rst +++ docs/LanguageExtensions.rst @@ -1975,6 +1975,63 @@ Support for constant expression evaluation for the above builtins be detected with ``__has_feature(cxx_constexpr_string_builtins)``. +Atomic Min/Max builtins with memory ordering +-------------------------------------------- + +There are two atomic builtins with min/max in-memory comparison and swap. +The syntax and semantics are similar to GCC-compatible __atomic_* builtins. + +* ``__atomic_fetch_min`` +* ``__atomic_fetch_max`` + +The builtins work with signed and unsigned integers and require to specify memory ordering. +The return value is the original value that was stored in memory before comparison. + +Example: + +.. code-block:: c + + unsigned int val = __atomic_fetch_min(unsigned int *pi, unsigned int ui, __ATOMIC_RELAXED); + +The third argument defines the memory model. There are six memory models that can +be specified. It allows to achieve a balance between performance and +necessary guarantees. + +* ``__ATOMIC_RELAXED`` + + The weakest level of atomicity. It does not provide any general synchronization. + It guarantees that if you take all the operations affecting a specific address, + a consistent ordering exists. An optimizer is allowed to reorder non-atomic + and Unordered loads around atomic-relaxed operations. But it can't split an + atomic operation into multiple sub-operations. + +* ``__ATOMIC_CONSUME`` + + Data dependency only for both barrier and synchronization with another thread. + The non-dependent shared variables may be moved around the atomic-consume + operations. When two threads synchronize in sequentially consistent mode, + all the visible variables must be flushed through the system so that all threads + see the same state. + +* ``__ATOMIC_ACQUIRE`` + + Provides a barrier for hoisting of code and synchronizes with release (or stronger) + semantic stores from another thread. +* ``__ATOMIC_RELEASE`` + + Provides a barrier for sinking of code and synchronizes with acquire (or stronger) + semantic loads from another thread. +* ``__ATOMIC_ACQ_REL`` + + Provides a full barrier in both directions and synchronizes with acquire loads + and release stores in another thread. + +* ``__ATOMIC_SEQ_CST`` + + Provides a a full barrier in both directions and synchronizes with acquire loads + and release stores in all threads. It guarantees that a total ordering exists + between all sequentially-consistent operations. + .. _langext-__c11_atomic: __c11_atomic builtins @@ -2734,4 +2791,3 @@ The ``#pragma comment(lib, ...)`` directive is supported on all ELF targets. The second parameter is the library name (without the traditional Unix prefix of ``lib``). This allows you to provide an implicit link of dependent libraries. - Index: include/clang/Basic/Builtins.def =================================================================== --- include/clang/Basic/Builtins.def +++ include/clang/Basic/Builtins.def @@ -721,6 +721,10 @@ ATOMIC_BUILTIN(__opencl_atomic_fetch_min, "v.", "t") ATOMIC_BUILTIN(__opencl_atomic_fetch_max, "v.", "t") +// GCC does not support these, they are a Clang extension. +ATOMIC_BUILTIN(__atomic_fetch_min, "iiD*i.", "t") +ATOMIC_BUILTIN(__atomic_fetch_max, "v.", "t") + #undef ATOMIC_BUILTIN // Non-overloaded atomic builtins. Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -7114,6 +7114,8 @@ def err_atomic_op_needs_atomic_int_or_ptr : Error< "address argument to atomic operation must be a pointer to %select{|atomic }0" "integer or pointer (%1 invalid)">; +def err_atomic_op_needs_int32_or_ptr : Error< + "address argument to atomic operation must be a pointer to signed or unsigned 32-bit integer">; def err_atomic_op_bitwise_needs_atomic_int : Error< "address argument to bitwise atomic operation must be a pointer to " "%select{|atomic }0integer (%1 invalid)">; Index: lib/AST/Expr.cpp =================================================================== --- lib/AST/Expr.cpp +++ lib/AST/Expr.cpp @@ -4051,6 +4051,8 @@ case AO__atomic_or_fetch: case AO__atomic_xor_fetch: case AO__atomic_nand_fetch: + case AO__atomic_fetch_min: + case AO__atomic_fetch_max: return 3; case AO__opencl_atomic_store: Index: lib/CodeGen/CGAtomic.cpp =================================================================== --- lib/CodeGen/CGAtomic.cpp +++ lib/CodeGen/CGAtomic.cpp @@ -591,11 +591,13 @@ break; case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__atomic_fetch_min: Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min : llvm::AtomicRMWInst::UMin; break; case AtomicExpr::AO__opencl_atomic_fetch_max: + case AtomicExpr::AO__atomic_fetch_max: Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max : llvm::AtomicRMWInst::UMax; break; @@ -859,6 +861,8 @@ case AtomicExpr::AO__atomic_or_fetch: case AtomicExpr::AO__atomic_xor_fetch: case AtomicExpr::AO__atomic_nand_fetch: + case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__atomic_fetch_max: Val1 = EmitValToTemp(*this, E->getVal1()); break; } @@ -913,6 +917,8 @@ case AtomicExpr::AO__atomic_or_fetch: case AtomicExpr::AO__atomic_sub_fetch: case AtomicExpr::AO__atomic_xor_fetch: + case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__atomic_fetch_max: // For these, only library calls for certain sizes exist. UseOptimizedLibcall = true; break; @@ -1095,6 +1101,7 @@ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), sizeChars); break; + case AtomicExpr::AO__atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: LibCallName = E->getValueType()->isSignedIntegerType() ? "__atomic_fetch_min" @@ -1102,6 +1109,7 @@ AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), LoweredMemTy, E->getExprLoc(), sizeChars); break; + case AtomicExpr::AO__atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: LibCallName = E->getValueType()->isSignedIntegerType() ? "__atomic_fetch_max" Index: lib/Sema/SemaChecking.cpp =================================================================== --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -3037,6 +3037,7 @@ Op == AtomicExpr::AO__atomic_exchange_n || Op == AtomicExpr::AO__atomic_compare_exchange_n; bool IsAddSub = false; + bool IsMinMax = false; switch (Op) { case AtomicExpr::AO__c11_atomic_init: @@ -3090,6 +3091,12 @@ Form = Arithmetic; break; + case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__atomic_fetch_max: + IsMinMax = true; + Form = Arithmetic; + break; + case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: @@ -3172,12 +3179,21 @@ // For an arithmetic operation, the implied arithmetic must be well-formed. if (Form == Arithmetic) { // gcc does not enforce these rules for GNU atomics, but we do so for sanity. - if (IsAddSub && !ValType->isIntegerType() && !ValType->isPointerType()) { + if (IsAddSub && !ValType->isIntegerType() + && !ValType->isPointerType()) { Diag(DRE->getLocStart(), diag::err_atomic_op_needs_atomic_int_or_ptr) << IsC11 << Ptr->getType() << Ptr->getSourceRange(); return ExprError(); } - if (!IsAddSub && !ValType->isIntegerType()) { + if (IsMinMax) { + const BuiltinType *BT = ValType->getAs(); + if (!BT || (BT->getKind() != BuiltinType::Int && + BT->getKind() != BuiltinType::UInt)) { + Diag(DRE->getLocStart(), diag::err_atomic_op_needs_int32_or_ptr); + return ExprError(); + } + } + if (!IsAddSub && !IsMinMax && !ValType->isIntegerType()) { Diag(DRE->getLocStart(), diag::err_atomic_op_bitwise_needs_atomic_int) << IsC11 << Ptr->getType() << Ptr->getSourceRange(); return ExprError(); Index: test/CodeGen/Atomics.c =================================================================== --- test/CodeGen/Atomics.c +++ test/CodeGen/Atomics.c @@ -291,3 +291,10 @@ __sync_lock_release (&sll); // CHECK: store atomic {{.*}} release, align 8 __sync_lock_release (&ull); // CHECK: store atomic {{.*}} release, align 8 } + +void test_atomic(void) { + ui = __atomic_fetch_min(&ui, 5, __ATOMIC_RELAXED); // CHECK: atomicrmw umin {{.*}} monotonic + si = __atomic_fetch_min(&si, 5, __ATOMIC_SEQ_CST); // CHECK: atomicrmw min {{.*}} seq_cst + ui = __atomic_fetch_max(&ui, 5, __ATOMIC_ACQUIRE); // CHECK: atomicrmw umax {{.*}} acquire + si = __atomic_fetch_max(&si, 5, __ATOMIC_RELEASE); // CHECK: atomicrmw max {{.*}} release +} Index: test/Sema/atomic-ops.c =================================================================== --- test/Sema/atomic-ops.c +++ test/Sema/atomic-ops.c @@ -173,6 +173,9 @@ __atomic_fetch_sub(P, 3, memory_order_seq_cst); __atomic_fetch_sub(D, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer or pointer}} __atomic_fetch_sub(s1, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer or pointer}} + __atomic_fetch_min(D, 3, memory_order_seq_cst); // expected-error {{must be a pointer to signed or unsigned 32-bit integer}} + __atomic_fetch_max(P, 3, memory_order_seq_cst); // expected-error {{must be a pointer to signed or unsigned 32-bit integer}} + __atomic_fetch_max(p, 3); // expected-error {{too few arguments to function call, expected 3, have 2}} __c11_atomic_fetch_and(i, 1, memory_order_seq_cst); __c11_atomic_fetch_and(p, 1, memory_order_seq_cst); // expected-error {{must be a pointer to atomic integer}} @@ -456,6 +459,20 @@ (void)__atomic_fetch_nand(p, val, memory_order_acq_rel); (void)__atomic_fetch_nand(p, val, memory_order_seq_cst); + (void)__atomic_fetch_min(p, val, memory_order_relaxed); + (void)__atomic_fetch_min(p, val, memory_order_acquire); + (void)__atomic_fetch_min(p, val, memory_order_consume); + (void)__atomic_fetch_min(p, val, memory_order_release); + (void)__atomic_fetch_min(p, val, memory_order_acq_rel); + (void)__atomic_fetch_min(p, val, memory_order_seq_cst); + + (void)__atomic_fetch_max(p, val, memory_order_relaxed); + (void)__atomic_fetch_max(p, val, memory_order_acquire); + (void)__atomic_fetch_max(p, val, memory_order_consume); + (void)__atomic_fetch_max(p, val, memory_order_release); + (void)__atomic_fetch_max(p, val, memory_order_acq_rel); + (void)__atomic_fetch_max(p, val, memory_order_seq_cst); + (void)__atomic_and_fetch(p, val, memory_order_relaxed); (void)__atomic_and_fetch(p, val, memory_order_acquire); (void)__atomic_and_fetch(p, val, memory_order_consume);