diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -153,6 +153,11 @@ TARGET_HEADER_BUILTIN(_InterlockedCompareExchange64_nf, "LLiLLiD*LLiLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedCompareExchange64_rel, "LLiLLiD*LLiLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedCompareExchange128, "UcLLiD*LLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedCompareExchange128_acq,"UcLLiD*LLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedCompareExchange128_nf ,"UcLLiD*LLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedCompareExchange128_rel,"UcLLiD*LLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + TARGET_HEADER_BUILTIN(_InterlockedOr8_acq, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr8_nf, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr8_rel, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -304,6 +304,10 @@ AtomicOrdering::Monotonic : SuccessOrdering; + // The atomic instruction is marked volatile for consistency with MSVC. This + // blocks the few atomics optimizations that LLVM has. If we want to optimize + // _Interlocked* operations in the future, we will have to remove the volatile + // marker. auto *Result = CGF.Builder.CreateAtomicCmpXchg( Destination, Comparand, Exchange, SuccessOrdering, FailureOrdering); @@ -311,6 +315,68 @@ return CGF.Builder.CreateExtractValue(Result, 0); } +// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are +// prototyped like this: +// +// unsigned char _InterlockedCompareExchange128...( +// __int64 volatile * _Destination, +// __int64 _ExchangeHigh, +// __int64 _ExchangeLow, +// __int64 * _ComparandResult); +static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, + const CallExpr *E, + AtomicOrdering SuccessOrdering) { + assert(E->getNumArgs() == 4); + llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0)); + llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2)); + llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3)); + + assert(Destination->getType()->isPointerTy()); + assert(!ExchangeHigh->getType()->isPointerTy()); + assert(!ExchangeLow->getType()->isPointerTy()); + assert(ComparandPtr->getType()->isPointerTy()); + + // For Release ordering, the failure ordering should be Monotonic. + auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release + ? AtomicOrdering::Monotonic + : SuccessOrdering; + + // Convert to i128 pointers and values. + llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128); + llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); + Destination = CGF.Builder.CreateBitCast(Destination, Int128PtrTy); + Address ComparandResult(CGF.Builder.CreateBitCast(ComparandPtr, Int128PtrTy), + CGF.getContext().toCharUnitsFromBits(128)); + + // (((i128)hi) << 64) | ((i128)lo) + ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty); + ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty); + ExchangeHigh = + CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64)); + llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow); + + // Load the comparand for the instruction. + llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult); + + auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + SuccessOrdering, FailureOrdering); + + // The atomic instruction is marked volatile for consistency with MSVC. This + // blocks the few atomics optimizations that LLVM has. If we want to optimize + // _Interlocked* operations in the future, we will have to remove the volatile + // marker. + CXI->setVolatile(true); + + // Store the result as an outparameter. + CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0), + ComparandResult); + + // Get the success boolean and zero extend it to i8. + Value *Success = CGF.Builder.CreateExtractValue(CXI, 1); + return CGF.Builder.CreateZExt(Success, CGF.Int8Ty); +} + static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { assert(E->getArg(0)->getType()->isPointerType()); @@ -993,6 +1059,10 @@ _InterlockedCompareExchange_acq, _InterlockedCompareExchange_rel, _InterlockedCompareExchange_nf, + _InterlockedCompareExchange128, + _InterlockedCompareExchange128_acq, + _InterlockedCompareExchange128_rel, + _InterlockedCompareExchange128_nf, _InterlockedOr_acq, _InterlockedOr_rel, _InterlockedOr_nf, @@ -1230,6 +1300,14 @@ case AArch64::BI_InterlockedCompareExchange_nf: case AArch64::BI_InterlockedCompareExchange64_nf: return MSVCIntrin::_InterlockedCompareExchange_nf; + case AArch64::BI_InterlockedCompareExchange128: + return MSVCIntrin::_InterlockedCompareExchange128; + case AArch64::BI_InterlockedCompareExchange128_acq: + return MSVCIntrin::_InterlockedCompareExchange128_acq; + case AArch64::BI_InterlockedCompareExchange128_nf: + return MSVCIntrin::_InterlockedCompareExchange128_nf; + case AArch64::BI_InterlockedCompareExchange128_rel: + return MSVCIntrin::_InterlockedCompareExchange128_rel; case AArch64::BI_InterlockedOr8_acq: case AArch64::BI_InterlockedOr16_acq: case AArch64::BI_InterlockedOr_acq: @@ -1317,6 +1395,8 @@ return MSVCIntrin::_BitScanReverse; case clang::X86::BI_InterlockedAnd64: return MSVCIntrin::_InterlockedAnd; + case clang::X86::BI_InterlockedCompareExchange128: + return MSVCIntrin::_InterlockedCompareExchange128; case clang::X86::BI_InterlockedExchange64: return MSVCIntrin::_InterlockedExchange; case clang::X86::BI_InterlockedExchangeAdd64: @@ -1423,6 +1503,15 @@ return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release); case MSVCIntrin::_InterlockedCompareExchange_nf: return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedCompareExchange128: + return EmitAtomicCmpXchg128ForMSIntrin( + *this, E, AtomicOrdering::SequentiallyConsistent); + case MSVCIntrin::_InterlockedCompareExchange128_acq: + return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedCompareExchange128_rel: + return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release); + case MSVCIntrin::_InterlockedCompareExchange128_nf: + return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedOr_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, AtomicOrdering::Acquire); @@ -14032,42 +14121,6 @@ return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, llvm::SyncScope::SingleThread); } - case X86::BI_InterlockedCompareExchange128: { - // InterlockedCompareExchange128 doesn't directly refer to 128bit ints, - // instead it takes pointers to 64bit ints for Destination and - // ComparandResult, and exchange is taken as two 64bit ints (high & low). - // The previous value is written to ComparandResult, and success is - // returned. - - llvm::Type *Int128Ty = Builder.getInt128Ty(); - llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); - - Value *Destination = - Builder.CreateBitCast(Ops[0], Int128PtrTy); - Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty); - Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty); - Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy), - getContext().toCharUnitsFromBits(128)); - - Value *Exchange = Builder.CreateOr( - Builder.CreateShl(ExchangeHigh128, 64, "", false, false), - ExchangeLow128); - - Value *Comparand = Builder.CreateLoad(ComparandResult); - - AtomicCmpXchgInst *CXI = - Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, - AtomicOrdering::SequentiallyConsistent, - AtomicOrdering::SequentiallyConsistent); - CXI->setVolatile(true); - - // Write the result back to the inout pointer. - Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult); - - // Get the success boolean and zero extend it to i8. - Value *Success = Builder.CreateExtractValue(CXI, 1); - return Builder.CreateZExt(Success, ConvertType(E->getType())); - } case X86::BI_AddressOfReturnAddress: { Function *F = diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -214,10 +214,6 @@ unsigned char _interlockedbittestandset64(__int64 volatile *, __int64); long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange, long _Comparand); -unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, - __int64 _ExchangeHigh, - __int64 _ExchangeLow, - __int64 *_CompareandResult); unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, @@ -427,6 +423,26 @@ __int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); #endif +#if defined(__x86_64__) || defined(__aarch64__) +unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_ComparandResult); +#endif +#if defined(__aarch64__) +unsigned char _InterlockedCompareExchange128_acq(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_ComparandResult); +unsigned char _InterlockedCompareExchange128_nf(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_ComparandResult); +unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_ComparandResult); +#endif /*----------------------------------------------------------------------------*\ |* movs, stos diff --git a/clang/test/CodeGen/ms-intrinsics.c b/clang/test/CodeGen/ms-intrinsics.c --- a/clang/test/CodeGen/ms-intrinsics.c +++ b/clang/test/CodeGen/ms-intrinsics.c @@ -6,10 +6,10 @@ // RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ARM,CHECK-ARM-ARM64,CHECK-ARM-X64 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \ // RUN: -triple x86_64--windows -Oz -emit-llvm -target-feature +cx16 %s -o - \ -// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-X64,CHECK-ARM-X64,CHECK-INTEL +// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-X64,CHECK-ARM-X64,CHECK-INTEL,CHECK-64 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \ // RUN: -triple aarch64-windows -Oz -emit-llvm %s -o - \ -// RUN: | FileCheck %s --check-prefixes CHECK-ARM-ARM64,CHECK-ARM-X64,CHECK-ARM64 +// RUN: | FileCheck %s --check-prefixes CHECK-ARM-ARM64,CHECK-ARM-X64,CHECK-ARM64,CHECK-64 // intrin.h needs size_t, but -ffreestanding prevents us from getting it from // stddef.h. Work around it with this typedef. @@ -432,32 +432,59 @@ // CHECK: ret i64 [[RESULT]] // CHECK: } -#if defined(__x86_64__) +#if defined(__x86_64__) || defined(__aarch64__) unsigned char test_InterlockedCompareExchange128( __int64 volatile *Destination, __int64 ExchangeHigh, __int64 ExchangeLow, __int64 *ComparandResult) { return _InterlockedCompareExchange128(++Destination, ++ExchangeHigh, ++ExchangeLow, ++ComparandResult); } -// CHECK-X64: define{{.*}}i8 @test_InterlockedCompareExchange128(i64*{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%ExchangeHigh, i64{{[a-z_ ]*}}%ExchangeLow, i64*{{[a-z_ ]*}}%ComparandResult){{.*}}{ -// CHECK-X64: %incdec.ptr = getelementptr inbounds i64, i64* %Destination, i64 1 -// CHECK-X64: %inc = add nsw i64 %ExchangeHigh, 1 -// CHECK-X64: %inc1 = add nsw i64 %ExchangeLow, 1 -// CHECK-X64: %incdec.ptr2 = getelementptr inbounds i64, i64* %ComparandResult, i64 1 -// CHECK-X64: [[DST:%[0-9]+]] = bitcast i64* %incdec.ptr to i128* -// CHECK-X64: [[EH:%[0-9]+]] = zext i64 %inc to i128 -// CHECK-X64: [[EL:%[0-9]+]] = zext i64 %inc1 to i128 -// CHECK-X64: [[CNR:%[0-9]+]] = bitcast i64* %incdec.ptr2 to i128* -// CHECK-X64: [[EHS:%[0-9]+]] = shl nuw i128 [[EH]], 64 -// CHECK-X64: [[EXP:%[0-9]+]] = or i128 [[EHS]], [[EL]] -// CHECK-X64: [[ORG:%[0-9]+]] = load i128, i128* [[CNR]], align 16 -// CHECK-X64: [[RES:%[0-9]+]] = cmpxchg volatile i128* [[DST]], i128 [[ORG]], i128 [[EXP]] seq_cst seq_cst -// CHECK-X64: [[OLD:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 0 -// CHECK-X64: store i128 [[OLD]], i128* [[CNR]], align 16 -// CHECK-X64: [[SUC1:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 1 -// CHECK-X64: [[SUC8:%[0-9]+]] = zext i1 [[SUC1]] to i8 -// CHECK-X64: ret i8 [[SUC8]] -// CHECK-X64: } +// CHECK-64: define{{.*}}i8 @test_InterlockedCompareExchange128(i64*{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%ExchangeHigh, i64{{[a-z_ ]*}}%ExchangeLow, i64*{{[a-z_ ]*}}%ComparandResult){{.*}}{ +// CHECK-64: %incdec.ptr = getelementptr inbounds i64, i64* %Destination, i64 1 +// CHECK-64: %inc = add nsw i64 %ExchangeHigh, 1 +// CHECK-64: %inc1 = add nsw i64 %ExchangeLow, 1 +// CHECK-64: %incdec.ptr2 = getelementptr inbounds i64, i64* %ComparandResult, i64 1 +// CHECK-64: [[DST:%[0-9]+]] = bitcast i64* %incdec.ptr to i128* +// CHECK-64: [[CNR:%[0-9]+]] = bitcast i64* %incdec.ptr2 to i128* +// CHECK-64: [[EH:%[0-9]+]] = zext i64 %inc to i128 +// CHECK-64: [[EL:%[0-9]+]] = zext i64 %inc1 to i128 +// CHECK-64: [[EHS:%[0-9]+]] = shl nuw i128 [[EH]], 64 +// CHECK-64: [[EXP:%[0-9]+]] = or i128 [[EHS]], [[EL]] +// CHECK-64: [[ORG:%[0-9]+]] = load i128, i128* [[CNR]], align 16 +// CHECK-64: [[RES:%[0-9]+]] = cmpxchg volatile i128* [[DST]], i128 [[ORG]], i128 [[EXP]] seq_cst seq_cst +// CHECK-64: [[OLD:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 0 +// CHECK-64: store i128 [[OLD]], i128* [[CNR]], align 16 +// CHECK-64: [[SUC1:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 1 +// CHECK-64: [[SUC8:%[0-9]+]] = zext i1 [[SUC1]] to i8 +// CHECK-64: ret i8 [[SUC8]] +// CHECK-64: } +#endif + +#if defined(__aarch64__) +unsigned char test_InterlockedCompareExchange128_acq( + __int64 volatile *Destination, __int64 ExchangeHigh, + __int64 ExchangeLow, __int64 *ComparandResult) { + return _InterlockedCompareExchange128_acq(Destination, ExchangeHigh, + ExchangeLow, ComparandResult); +} +unsigned char test_InterlockedCompareExchange128_nf( + __int64 volatile *Destination, __int64 ExchangeHigh, + __int64 ExchangeLow, __int64 *ComparandResult) { + return _InterlockedCompareExchange128_nf(Destination, ExchangeHigh, + ExchangeLow, ComparandResult); +} +unsigned char test_InterlockedCompareExchange128_rel( + __int64 volatile *Destination, __int64 ExchangeHigh, + __int64 ExchangeLow, __int64 *ComparandResult) { + return _InterlockedCompareExchange128_rel(Destination, ExchangeHigh, + ExchangeLow, ComparandResult); +} +// CHECK-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange128_acq({{.*}}) +// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} acquire acquire +// CHECK-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange128_nf({{.*}}) +// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} monotonic monotonic +// CHECK-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange128_rel({{.*}}) +// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} release monotonic #endif short test_InterlockedIncrement16(short volatile *Addend) {