diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -374,8 +374,7 @@ llvm::Value *Desired = CGF.Builder.CreateLoad(Val2); llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg( - Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder, - Scope); + Ptr, Expected, Desired, SuccessOrder, FailureOrder, Scope); Pair->setVolatile(E->isVolatile()); Pair->setWeak(IsWeak); @@ -676,7 +675,7 @@ llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::AtomicRMWInst *RMWI = - CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope); + CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order, Scope); RMWI->setVolatile(E->isVolatile()); // For __atomic_*_fetch operations, perform the operation again to @@ -1679,8 +1678,7 @@ llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak) { // Do the atomic store. Address Addr = getAtomicAddressAsAtomicIntPointer(); - auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr.getPointer(), - ExpectedVal, DesiredVal, + auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, ExpectedVal, DesiredVal, Success, Failure); // Other decoration. Inst->setVolatile(LVal.isVolatileQualified()); diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -132,25 +132,22 @@ return CreateAlignedStore(getInt1(Value), Addr, CharUnits::One()); } - // Temporarily use old signature; clang will be updated to an Address overload - // in a subsequent patch. llvm::AtomicCmpXchgInst * - CreateAtomicCmpXchg(llvm::Value *Ptr, llvm::Value *Cmp, llvm::Value *New, + CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID = llvm::SyncScope::System) { return CGBuilderBaseTy::CreateAtomicCmpXchg( - Ptr, Cmp, New, llvm::MaybeAlign(), SuccessOrdering, FailureOrdering, - SSID); + Addr.getPointer(), Cmp, New, Addr.getAlignment().getAsAlign(), + SuccessOrdering, FailureOrdering, SSID); } - // Temporarily use old signature; clang will be updated to an Address overload - // in a subsequent patch. llvm::AtomicRMWInst * - CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, llvm::Value *Ptr, - llvm::Value *Val, llvm::AtomicOrdering Ordering, + CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, + llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID = llvm::SyncScope::System) { - return CGBuilderBaseTy::CreateAtomicRMW(Op, Ptr, Val, llvm::MaybeAlign(), + return CGBuilderBaseTy::CreateAtomicRMW(Op, Addr.getPointer(), Val, + Addr.getAlignment().getAsAlign(), Ordering, SSID); } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -133,33 +133,44 @@ return V; } +// This helper function returns a new Address, based on Addr, but with its +// alignment set to at least as large as the type's size. +static Address ForceNaturalAlignment(CodeGenFunction &CGF, Address Addr) { + CharUnits TypeSize = CharUnits::fromQuantity( + CGF.CGM.getDataLayout().getTypeStoreSize(Addr.getElementType())); + if (Addr.getAlignment() < TypeSize) + return Address(Addr.getPointer(), TypeSize); + return Addr; +} + /// Utility to insert an atomic instruction based on Intrinsic::ID /// and the expression node. static Value *MakeBinaryAtomicValue( CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, - AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent, + bool RequireNaturalAlignment = false) { QualType T = E->getType(); assert(E->getArg(0)->getType()->isPointerType()); assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(0)->getType()->getPointeeType())); assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); - llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); - unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); + Address DestAddr = CGF.EmitPointerWithAlignment(E->getArg(0)); llvm::IntegerType *IntType = llvm::IntegerType::get(CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); - llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); - llvm::Value *Args[2]; - Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); - Args[1] = CGF.EmitScalarExpr(E->getArg(1)); - llvm::Type *ValueType = Args[1]->getType(); - Args[1] = EmitToInt(CGF, Args[1], T, IntType); + DestAddr = CGF.Builder.CreateElementBitCast(DestAddr, IntType); + llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Type *ValueType = Val->getType(); + Val = EmitToInt(CGF, Val, T, IntType); - llvm::Value *Result = CGF.Builder.CreateAtomicRMW( - Kind, Args[0], Args[1], Ordering); + if (RequireNaturalAlignment) + DestAddr = ForceNaturalAlignment(CGF, DestAddr); + + llvm::Value *Result = + CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering); return EmitFromInt(CGF, Result, T, ValueType); } @@ -205,23 +216,20 @@ E->getArg(0)->getType()->getPointeeType())); assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); - llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); - unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); + Address DestAddr = CGF.EmitPointerWithAlignment(E->getArg(0)); llvm::IntegerType *IntType = llvm::IntegerType::get(CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); - llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); - llvm::Value *Args[2]; - Args[1] = CGF.EmitScalarExpr(E->getArg(1)); - llvm::Type *ValueType = Args[1]->getType(); - Args[1] = EmitToInt(CGF, Args[1], T, IntType); - Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); + DestAddr = CGF.Builder.CreateElementBitCast(DestAddr, IntType); + llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Type *ValueType = Val->getType(); + Val = EmitToInt(CGF, Val, T, IntType); llvm::Value *Result = CGF.Builder.CreateAtomicRMW( - Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); - Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); + Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent); + Result = CGF.Builder.CreateBinOp(Op, Result, Val); if (Invert) Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, @@ -247,22 +255,19 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool) { QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); - llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); - unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); + Address DestAddr = CGF.EmitPointerWithAlignment(E->getArg(0)); llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); - llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); - Value *Args[3]; - Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); - Args[1] = CGF.EmitScalarExpr(E->getArg(1)); - llvm::Type *ValueType = Args[1]->getType(); - Args[1] = EmitToInt(CGF, Args[1], T, IntType); - Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); + DestAddr = CGF.Builder.CreateElementBitCast(DestAddr, IntType); + Value *Cmp = CGF.EmitScalarExpr(E->getArg(1)); + llvm::Type *ValueType = Cmp->getType(); + Cmp = EmitToInt(CGF, Cmp, T, IntType); + Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); Value *Pair = CGF.Builder.CreateAtomicCmpXchg( - Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, + DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering::SequentiallyConsistent); if (ReturnBool) // Extract boolean success flag and zext it to int. @@ -286,7 +291,9 @@ /// CreateAtomicCmpXchg. That is the reason we could not use the above utility /// function MakeAtomicCmpXchgValue since it expects the arguments to be /// already swapped. - +/// +/// Note that the MSVC 'Interlocked' intrinsics always assume the destination +/// address has been aligned to their size. static Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) { @@ -298,7 +305,9 @@ assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), E->getArg(2)->getType())); - auto *Destination = CGF.EmitScalarExpr(E->getArg(0)); + Address DestAddr = CGF.EmitPointerWithAlignment(E->getArg(0)); + DestAddr = ForceNaturalAlignment(CGF, DestAddr); + auto *Comparand = CGF.EmitScalarExpr(E->getArg(2)); auto *Exchange = CGF.EmitScalarExpr(E->getArg(1)); @@ -312,8 +321,7 @@ // _Interlocked* operations in the future, we will have to remove the volatile // marker. auto *Result = CGF.Builder.CreateAtomicCmpXchg( - Destination, Comparand, Exchange, - SuccessOrdering, FailureOrdering); + DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering); Result->setVolatile(true); return CGF.Builder.CreateExtractValue(Result, 0); } @@ -326,19 +334,23 @@ // __int64 _ExchangeHigh, // __int64 _ExchangeLow, // __int64 * _ComparandResult); +// +// Note that Destination is assumed to be at least 16-byte aligned, despite +// being typed int64. + static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering) { assert(E->getNumArgs() == 4); - llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0)); + Address DestAddr = CGF.EmitPointerWithAlignment(E->getArg(0)); llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2)); - llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3)); + Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3)); - assert(Destination->getType()->isPointerTy()); + assert(DestAddr.getType()->isPointerTy()); assert(!ExchangeHigh->getType()->isPointerTy()); assert(!ExchangeLow->getType()->isPointerTy()); - assert(ComparandPtr->getType()->isPointerTy()); + assert(ComparandAddr.getType()->isPointerTy()); // For Release ordering, the failure ordering should be Monotonic. auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release @@ -347,10 +359,11 @@ // Convert to i128 pointers and values. llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128); - llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); - Destination = CGF.Builder.CreateBitCast(Destination, Int128PtrTy); - Address ComparandResult(CGF.Builder.CreateBitCast(ComparandPtr, Int128PtrTy), - CGF.getContext().toCharUnitsFromBits(128)); + DestAddr = CGF.Builder.CreateElementBitCast(DestAddr, Int128Ty); + ComparandAddr = CGF.Builder.CreateElementBitCast(ComparandAddr, Int128Ty); + + // Force alignment after casting (thus forcing 16-byte alignment) + DestAddr = ForceNaturalAlignment(CGF, DestAddr); // (((i128)hi) << 64) | ((i128)lo) ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty); @@ -360,9 +373,9 @@ llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow); // Load the comparand for the instruction. - llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult); + llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr); - auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering); // The atomic instruction is marked volatile for consistency with MSVC. This @@ -373,36 +386,40 @@ // Store the result as an outparameter. CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0), - ComparandResult); + ComparandAddr); // Get the success boolean and zero extend it to i8. Value *Success = CGF.Builder.CreateExtractValue(CXI, 1); return CGF.Builder.CreateZExt(Success, CGF.Int8Ty); } -static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, - AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { +static Value *EmitAtomicIncrementValue( + CodeGenFunction &CGF, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent, + bool RequireNaturalAlignment = false) { assert(E->getArg(0)->getType()->isPointerType()); auto *IntTy = CGF.ConvertType(E->getType()); + Address DestAddr = CGF.EmitPointerWithAlignment(E->getArg(0)); + if (RequireNaturalAlignment) + DestAddr = ForceNaturalAlignment(CGF, DestAddr); auto *Result = CGF.Builder.CreateAtomicRMW( - AtomicRMWInst::Add, - CGF.EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - Ordering); + AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering); return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1)); } -static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, - AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { +static Value *EmitAtomicDecrementValue( + CodeGenFunction &CGF, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent, + bool RequireNaturalAlignment = false) { assert(E->getArg(0)->getType()->isPointerType()); auto *IntTy = CGF.ConvertType(E->getType()); + Address DestAddr = CGF.EmitPointerWithAlignment(E->getArg(0)); + if (RequireNaturalAlignment) + DestAddr = ForceNaturalAlignment(CGF, DestAddr); auto *Result = CGF.Builder.CreateAtomicRMW( - AtomicRMWInst::Sub, - CGF.EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - Ordering); + AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering); return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); } @@ -962,8 +979,7 @@ Mask = CGF.Builder.CreateNot(Mask); RMWOp = llvm::AtomicRMWInst::And; } - OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask, - Ordering); + OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering); } else { // Emit a plain load for the non-interlocked intrinsics. OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte"); @@ -1475,35 +1491,53 @@ return Result; } case MSVCIntrin::_InterlockedAnd: - return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + AtomicOrdering::SequentiallyConsistent, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedExchange: - return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + AtomicOrdering::SequentiallyConsistent, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedExchangeAdd: - return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + AtomicOrdering::SequentiallyConsistent, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedExchangeSub: - return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E, + AtomicOrdering::SequentiallyConsistent, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedOr: - return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + AtomicOrdering::SequentiallyConsistent, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedXor: - return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, + AtomicOrdering::SequentiallyConsistent, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedExchangeAdd_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, - AtomicOrdering::Acquire); + AtomicOrdering::Acquire, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedExchangeAdd_rel: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, - AtomicOrdering::Release); + AtomicOrdering::Release, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedExchangeAdd_nf: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, - AtomicOrdering::Monotonic); + AtomicOrdering::Monotonic, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedExchange_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, - AtomicOrdering::Acquire); + AtomicOrdering::Acquire, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedExchange_rel: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, - AtomicOrdering::Release); + AtomicOrdering::Release, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedExchange_nf: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, - AtomicOrdering::Monotonic); + AtomicOrdering::Monotonic, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedCompareExchange_acq: return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire); case MSVCIntrin::_InterlockedCompareExchange_rel: @@ -1521,48 +1555,67 @@ return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedOr_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, - AtomicOrdering::Acquire); + AtomicOrdering::Acquire, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedOr_rel: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, - AtomicOrdering::Release); + AtomicOrdering::Release, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedOr_nf: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, - AtomicOrdering::Monotonic); + AtomicOrdering::Monotonic, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedXor_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, - AtomicOrdering::Acquire); + AtomicOrdering::Acquire, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedXor_rel: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, - AtomicOrdering::Release); + AtomicOrdering::Release, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedXor_nf: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, - AtomicOrdering::Monotonic); + AtomicOrdering::Monotonic, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedAnd_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, - AtomicOrdering::Acquire); + AtomicOrdering::Acquire, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedAnd_rel: return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, - AtomicOrdering::Release); + AtomicOrdering::Release, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedAnd_nf: return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, - AtomicOrdering::Monotonic); + AtomicOrdering::Monotonic, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedIncrement_acq: - return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire); + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedIncrement_rel: - return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release); + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedIncrement_nf: - return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic); + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedDecrement_acq: - return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire); + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedDecrement_rel: - return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release); + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedDecrement_nf: - return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic); + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedDecrement: - return EmitAtomicDecrementValue(*this, E); + return EmitAtomicDecrementValue(*this, E, + AtomicOrdering::SequentiallyConsistent, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::_InterlockedIncrement: - return EmitAtomicIncrementValue(*this, E); + return EmitAtomicIncrementValue(*this, E, + AtomicOrdering::SequentiallyConsistent, + /*RequireNaturalAlignment=*/true); case MSVCIntrin::__fastfail: { // Request immediate process termination from the kernel. The instruction @@ -3762,9 +3815,8 @@ bool Volatile = PtrTy->castAs()->getPointeeType().isVolatileQualified(); - Value *Ptr = EmitScalarExpr(E->getArg(0)); - unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); - Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); + Address PtrAddr = EmitPointerWithAlignment(E->getArg(0)); + PtrAddr = Builder.CreateElementBitCast(PtrAddr, Int8Ty); Value *NewVal = Builder.getInt8(1); Value *Order = EmitScalarExpr(E->getArg(1)); if (isa(Order)) { @@ -3773,26 +3825,28 @@ switch (ord) { case 0: // memory_order_relaxed default: // invalid order - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, - llvm::AtomicOrdering::Monotonic); + Result = + Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, PtrAddr, NewVal, + llvm::AtomicOrdering::Monotonic); break; case 1: // memory_order_consume case 2: // memory_order_acquire - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, - llvm::AtomicOrdering::Acquire); + Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, PtrAddr, + NewVal, llvm::AtomicOrdering::Acquire); break; case 3: // memory_order_release - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, - llvm::AtomicOrdering::Release); + Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, PtrAddr, + NewVal, llvm::AtomicOrdering::Release); break; case 4: // memory_order_acq_rel - Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, - llvm::AtomicOrdering::AcquireRelease); + Result = + Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, PtrAddr, NewVal, + llvm::AtomicOrdering::AcquireRelease); break; case 5: // memory_order_seq_cst Result = Builder.CreateAtomicRMW( - llvm::AtomicRMWInst::Xchg, Ptr, NewVal, + llvm::AtomicRMWInst::Xchg, PtrAddr, NewVal, llvm::AtomicOrdering::SequentiallyConsistent); break; } @@ -3823,7 +3877,7 @@ for (unsigned i = 0; i < 5; ++i) { Builder.SetInsertPoint(BBs[i]); AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, - Ptr, NewVal, Orders[i]); + PtrAddr, NewVal, Orders[i]); RMW->setVolatile(Volatile); Result->addIncoming(RMW, BBs[i]); Builder.CreateBr(ContBB); @@ -4279,10 +4333,10 @@ llvm::IntegerType *IntType = IntegerType::get(getLLVMContext(), getContext().getTypeSize(E->getType())); - llvm::Type *IntPtrType = IntType->getPointerTo(); - llvm::Value *Destination = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); + Address DestAddr = Builder.CreateElementBitCast( + EmitPointerWithAlignment(E->getArg(0)), IntType); + DestAddr = ForceNaturalAlignment(*this, DestAddr); llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); RTy = Exchange->getType(); @@ -4295,7 +4349,7 @@ BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ? AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent; - auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange, Ordering, Ordering); Result->setVolatile(true); @@ -10227,12 +10281,14 @@ } case AArch64::BI_InterlockedAdd: { - Value *Arg0 = EmitScalarExpr(E->getArg(0)); - Value *Arg1 = EmitScalarExpr(E->getArg(1)); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Add, Arg0, Arg1, - llvm::AtomicOrdering::SequentiallyConsistent); - return Builder.CreateAdd(RMWI, Arg1); + Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); + DestAddr = ForceNaturalAlignment(*this, DestAddr); + Value *Val = EmitScalarExpr(E->getArg(1)); + + AtomicRMWInst *RMWI = + Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, + llvm::AtomicOrdering::SequentiallyConsistent); + return Builder.CreateAdd(RMWI, Val); } } @@ -16324,9 +16380,10 @@ case NVPTX::BI__nvvm_atom_add_gen_f: case NVPTX::BI__nvvm_atom_add_gen_d: { - Value *Ptr = EmitScalarExpr(E->getArg(0)); + Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val, + + return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val, AtomicOrdering::SequentiallyConsistent); } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2430,7 +2430,7 @@ // For atomic bool increment, we just store true and return it for // preincrement, do an atomic swap with true for postincrement return Builder.CreateAtomicRMW( - llvm::AtomicRMWInst::Xchg, LV.getPointer(CGF), True, + llvm::AtomicRMWInst::Xchg, LV.getAddress(CGF), True, llvm::AtomicOrdering::SequentiallyConsistent); } // Special case for atomic increment / decrement on integers, emit @@ -2448,7 +2448,7 @@ llvm::Value *amt = CGF.EmitToMemory( llvm::ConstantInt::get(ConvertType(type), 1, true), type); llvm::Value *old = - Builder.CreateAtomicRMW(aop, LV.getPointer(CGF), amt, + Builder.CreateAtomicRMW(aop, LV.getAddress(CGF), amt, llvm::AtomicOrdering::SequentiallyConsistent); return isPre ? Builder.CreateBinOp(op, old, amt) : old; } @@ -3020,7 +3020,7 @@ E->getExprLoc()), LHSTy); Value *OldVal = Builder.CreateAtomicRMW( - AtomicOp, LHSLV.getPointer(CGF), Amt, + AtomicOp, LHSLV.getAddress(CGF), Amt, llvm::AtomicOrdering::SequentiallyConsistent); // Since operation is atomic, the result type is guaranteed to be the diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5150,7 +5150,7 @@ X.getType()->hasSignedIntegerRepresentation()); } llvm::Value *Res = - CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); + CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(CGF), UpdateVal, AO); return std::make_pair(true, RValue::get(Res)); } diff --git a/clang/test/CodeGen/atomic-ops.c b/clang/test/CodeGen/atomic-ops.c --- a/clang/test/CodeGen/atomic-ops.c +++ b/clang/test/CodeGen/atomic-ops.c @@ -655,9 +655,9 @@ __atomic_load(&aligned_a, &aligned_b, memory_order_seq_cst); // CHECK: store atomic i64 {{.*}}, align 16 __atomic_store(&aligned_a, &aligned_b, memory_order_seq_cst); - // CHECK: atomicrmw xchg i64* {{.*}}, align 8 + // CHECK: atomicrmw xchg i64* {{.*}}, align 16 __atomic_exchange(&aligned_a, &aligned_b, &aligned_c, memory_order_seq_cst); - // CHECK: cmpxchg weak i64* {{.*}}, align 8 + // CHECK: cmpxchg weak i64* {{.*}}, align 16 __atomic_compare_exchange(&aligned_a, &aligned_b, &aligned_c, 1, memory_order_seq_cst, memory_order_seq_cst); } diff --git a/clang/test/CodeGen/ms-intrinsics-underaligned.c b/clang/test/CodeGen/ms-intrinsics-underaligned.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/ms-intrinsics-underaligned.c @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \ +// RUN: -triple x86_64--windows -Oz -emit-llvm -target-feature +cx16 %s -o - \ +// RUN: | FileCheck %s + +// Ensure that we emit _Interlocked atomic operations specifying natural +// alignment, even when clang's usual alignment derivation would result in a +// lower alignment value. + +// intrin.h needs size_t, but -ffreestanding prevents us from getting it from +// stddef.h. Work around it with this typedef. +typedef __SIZE_TYPE__ size_t; + +#include + +#pragma pack(1) +typedef struct { + char a; + short b; + long c; + long long d; + void *p; +} X; + +_Static_assert(sizeof(X) == 23, ""); +_Static_assert(__alignof__(X) == 1, ""); + +// CHECK-LABEL: @test_InterlockedExchangePointer( +// CHECK: atomicrmw {{.*}} align 8 +void *test_InterlockedExchangePointer(X *x) { + return _InterlockedExchangePointer(&x->p, 0); +} + +// CHECK-LABEL: @test_InterlockedExchange8( +// CHECK: atomicrmw {{.*}} align 1 +char test_InterlockedExchange8(X *x) { + return _InterlockedExchange8(&x->a, 0); +} + +// CHECK-LABEL: @test_InterlockedExchange16( +// CHECK: atomicrmw {{.*}} align 2 +short test_InterlockedExchange16(X *x) { + return _InterlockedExchange16(&x->b, 0); +} + +// CHECK-LABEL: @test_InterlockedExchange( +// CHECK: atomicrmw {{.*}} align 4 +long test_InterlockedExchange(X *x) { + return _InterlockedExchange(&x->c, 0); +} + +// CHECK-LABEL: @test_InterlockedExchange64( +// CHECK: atomicrmw {{.*}} align 8 +long long test_InterlockedExchange64(X *x) { + return _InterlockedExchange64(&x->d, 0); +} + +// CHECK-LABEL: @test_InterlockedIncrement( +// CHECK: atomicrmw {{.*}} align 4 +long test_InterlockedIncrement(X *x) { + return _InterlockedIncrement(&x->c); +} + +// CHECK-LABEL: @test_InterlockedDecrement16( +// CHECK: atomicrmw {{.*}} align 2 +short test_InterlockedDecrement16(X *x) { + return _InterlockedDecrement16(&x->b); +} + + +// CHECK-LABEL: @test_InterlockedCompareExchangePointer( +// CHECK: cmpxchg {{.*}} align 8 +void *test_InterlockedCompareExchangePointer(X *x) { + return _InterlockedCompareExchangePointer(&x->p, 0, 0); +} + +// CHECK-LABEL: @test_InterlockedCompareExchange8( +// CHECK: cmpxchg {{.*}} align 1 +char test_InterlockedCompareExchange8(X *x) { + return _InterlockedCompareExchange8(&x->a, 0, 0); +} + +// CHECK-LABEL: @test_InterlockedCompareExchange16( +// CHECK: cmpxchg {{.*}} align 2 +short test_InterlockedCompareExchange16(X *x) { + return _InterlockedCompareExchange16(&x->b, 0, 0); +} + +// CHECK-LABEL: @test_InterlockedCompareExchange( +// CHECK: cmpxchg {{.*}} align 4 +long test_InterlockedCompareExchange(X *x) { + return _InterlockedCompareExchange(&x->c, 0, 0); +} + +// CHECK-LABEL: @test_InterlockedCompareExchange64( +// CHECK: cmpxchg {{.*}} align 8 +long long test_InterlockedCompareExchange64(X *x) { + return _InterlockedCompareExchange64(&x->d, 0, 0); +} + diff --git a/clang/test/CodeGen/ms-intrinsics.c b/clang/test/CodeGen/ms-intrinsics.c --- a/clang/test/CodeGen/ms-intrinsics.c +++ b/clang/test/CodeGen/ms-intrinsics.c @@ -450,10 +450,10 @@ // CHECK-64: [[EL:%[0-9]+]] = zext i64 %inc1 to i128 // CHECK-64: [[EHS:%[0-9]+]] = shl nuw i128 [[EH]], 64 // CHECK-64: [[EXP:%[0-9]+]] = or i128 [[EHS]], [[EL]] -// CHECK-64: [[ORG:%[0-9]+]] = load i128, i128* [[CNR]], align 16 +// CHECK-64: [[ORG:%[0-9]+]] = load i128, i128* [[CNR]], align 8 // CHECK-64: [[RES:%[0-9]+]] = cmpxchg volatile i128* [[DST]], i128 [[ORG]], i128 [[EXP]] seq_cst seq_cst, align 16 // CHECK-64: [[OLD:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 0 -// CHECK-64: store i128 [[OLD]], i128* [[CNR]], align 16 +// CHECK-64: store i128 [[OLD]], i128* [[CNR]], align 8 // CHECK-64: [[SUC1:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 1 // CHECK-64: [[SUC8:%[0-9]+]] = zext i1 [[SUC1]] to i8 // CHECK-64: ret i8 [[SUC8]] diff --git a/clang/test/OpenMP/parallel_reduction_codegen.cpp b/clang/test/OpenMP/parallel_reduction_codegen.cpp --- a/clang/test/OpenMP/parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_codegen.cpp @@ -198,7 +198,7 @@ // LAMBDA: br label %[[REDUCTION_DONE]] // LAMBDA: [[CASE2]] // LAMBDA: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]] - // LAMBDA: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic, align 4 + // LAMBDA: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic, align 128 // LAMBDA: br label %[[REDUCTION_DONE]] // LAMBDA: [[REDUCTION_DONE]] // LAMBDA: ret void @@ -255,7 +255,7 @@ // BLOCKS: br label %[[REDUCTION_DONE]] // BLOCKS: [[CASE2]] // BLOCKS: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]] - // BLOCKS: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic, align 4 + // BLOCKS: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic, align 128 // BLOCKS: br label %[[REDUCTION_DONE]] // BLOCKS: [[REDUCTION_DONE]] // BLOCKS: ret void @@ -771,7 +771,7 @@ // case 2: // t_var += t_var_reduction; // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]] -// CHECK: atomicrmw add i32* [[T_VAR_REF]], i32 [[T_VAR_PRIV_VAL]] monotonic, align 4 +// CHECK: atomicrmw add i32* [[T_VAR_REF]], i32 [[T_VAR_PRIV_VAL]] monotonic, align 128 // var = var.operator &(var_reduction); // CHECK: call void @__kmpc_critical( @@ -801,7 +801,7 @@ // t_var1 = min(t_var1, t_var1_reduction); // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_PRIV]] -// CHECK: atomicrmw min i32* [[T_VAR1_REF]], i32 [[T_VAR1_PRIV_VAL]] monotonic, align 4 +// CHECK: atomicrmw min i32* [[T_VAR1_REF]], i32 [[T_VAR1_PRIV_VAL]] monotonic, align 128 // break; // CHECK: br label %[[RED_DONE]]