Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -10247,8 +10247,8 @@ For most of these operations, the type of '' must be an integer type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. For xchg, this -may also be a floating point type with the same size constraints as -integers. For fadd/fsub, this must be a floating point type. The +may also be a floating point or a pointer type with the same size constraints +as integers. For fadd/fsub, this must be a floating point type. The type of the '````' operand must be a pointer to that type. If the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not allowed to modify the number or order of execution of this Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -7434,10 +7434,12 @@ if (Operation == AtomicRMWInst::Xchg) { if (!Val->getType()->isIntegerTy() && - !Val->getType()->isFloatingPointTy()) { - return error(ValLoc, - "atomicrmw " + AtomicRMWInst::getOperationName(Operation) + - " operand must be an integer or floating point type"); + !Val->getType()->isFloatingPointTy() && + !Val->getType()->isPointerTy()) { + return error( + ValLoc, + "atomicrmw " + AtomicRMWInst::getOperationName(Operation) + + " operand must be an integer, floating point, or pointer type"); } } else if (IsFP) { if (!Val->getType()->isFloatingPointTy()) { @@ -7453,7 +7455,9 @@ } } - unsigned Size = Val->getType()->getPrimitiveSizeInBits(); + unsigned Size = + PFS.getFunction().getParent()->getDataLayout().getTypeStoreSizeInBits( + Val->getType()); if (Size < 8 || (Size & (Size - 1))) return error(ValLoc, "atomicrmw operand must be power-of-two byte-sized" " integer"); Index: llvm/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -285,15 +285,6 @@ MadeChange = true; } else { AtomicRMWInst::BinOp Op = RMWI->getOperation(); - if (Op == AtomicRMWInst::Xchg && - RMWI->getValOperand()->getType()->isFloatingPointTy()) { - // TODO: add a TLI hook to control this so that each target can - // convert to lowering the original type one at a time. - RMWI = convertAtomicXchgToIntegerType(RMWI); - assert(RMWI->getValOperand()->getType()->isIntegerTy() && - "invariant broken"); - MadeChange = true; - } unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(RMWI); if (ValueSize < MinCASSize && @@ -385,7 +376,9 @@ Value *Val = RMWI->getValOperand(); Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); - Value *NewVal = Builder.CreateBitCast(Val, NewTy); + Value *NewVal = Val->getType()->isPointerTy() + ? Builder.CreatePtrToInt(Val, NewTy) + : Builder.CreateBitCast(Val, NewTy); auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal, @@ -393,7 +386,9 @@ NewRMWI->setVolatile(RMWI->isVolatile()); LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); - Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType()); + Value *NewRVal = RMWI->getType()->isPointerTy() + ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType()) + : Builder.CreateBitCast(NewRMWI, RMWI->getType()); RMWI->replaceAllUsesWith(NewRVal); RMWI->eraseFromParent(); return NewRMWI; @@ -525,7 +520,7 @@ Type *OrigTy = NewVal->getType(); // This code can go away when cmpxchg supports FP types. - bool NeedBitcast = OrigTy->isFloatingPointTy(); + bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isPointerTy(); if (NeedBitcast) { IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); unsigned AS = Addr->getType()->getPointerAddressSpace(); @@ -545,11 +540,20 @@ } bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { + bool MadeChange = false; LLVMContext &Ctx = AI->getModule()->getContext(); TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); + if (AI->getOperation() == AtomicRMWInst::Xchg && + (AI->getValOperand()->getType()->isFloatingPointTy() || + (Kind != TargetLoweringBase::AtomicExpansionKind::CmpXChg && + AI->getValOperand()->getType()->isPointerTy()))) { + AI = convertAtomicXchgToIntegerType(AI); + assert(AI->getValOperand()->getType()->isIntegerTy() && "invariant broken"); + MadeChange = true; + } switch (Kind) { case TargetLoweringBase::AtomicExpansionKind::None: - return false; + return MadeChange; case TargetLoweringBase::AtomicExpansionKind::LLSC: { unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(AI); @@ -572,7 +576,7 @@ if (ValueSize < MinCASSize) { // TODO: Handle atomicrmw fadd/fsub if (AI->getType()->isFloatingPointTy()) - return false; + return MadeChange; expandPartwordAtomicRMW(AI, TargetLoweringBase::AtomicExpansionKind::CmpXChg); Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -3925,7 +3925,8 @@ auto Op = RMWI.getOperation(); Type *ElTy = RMWI.getOperand(1)->getType(); if (Op == AtomicRMWInst::Xchg) { - Check(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), + Check(ElTy->isIntegerTy() || ElTy->isFloatingPointTy() || + ElTy->isPointerTy(), "atomicrmw " + AtomicRMWInst::getOperationName(Op) + " operand must have integer or floating point type!", &RMWI, ElTy); Index: llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-fp-or-pointer-type.ll =================================================================== --- /dev/null +++ llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-fp-or-pointer-type.ll @@ -0,0 +1,7 @@ +; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s + +; CHECK: error: atomicrmw xchg operand must be an integer, floating point, or pointer type +define void @f(<3 x i1>* %ptr) { + atomicrmw xchg <3 x i1>* %ptr, <3 x i1> seq_cst + ret void +} Index: llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll =================================================================== --- llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s - -; CHECK: error: atomicrmw xchg operand must be an integer or floating point type -define void @f(i32** %ptr) { - atomicrmw xchg i32** %ptr, i32* null seq_cst - ret void -} Index: llvm/test/Bitcode/compatibility.ll =================================================================== --- llvm/test/Bitcode/compatibility.ll +++ llvm/test/Bitcode/compatibility.ll @@ -854,6 +854,12 @@ ret void } +define void @pointer_atomics(i8** %word) { +; CHECK: %atomicrmw.xchg = atomicrmw xchg i8** %word, i8* null monotonic + %atomicrmw.xchg = atomicrmw xchg i8** %word, i8* null monotonic + ret void +} + ;; Fast Math Flags define void @fastmathflags_unop(float %op1) { %f.nnan = fneg nnan float %op1 Index: llvm/test/CodeGen/AArch64/atomicrmw-xchg-pointer.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/atomicrmw-xchg-pointer.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update +; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE +; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE + +define i8* @test_rmw_xchg_pointer(i8** %dst, i8* %new) { +; NOLSE-LABEL: test_rmw_xchg_pointer: +; NOLSE: // %bb.0: +; NOLSE-NEXT: mov x8, x0 +; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 +; NOLSE-NEXT: ldaxr x0, [x8] +; NOLSE-NEXT: stlxr w9, x1, [x8] +; NOLSE-NEXT: cbnz w9, .LBB0_1 +; NOLSE-NEXT: // %bb.2: // %atomicrmw.end +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_xchg_pointer: +; LSE: // %bb.0: +; LSE-NEXT: swpal x1, x0, [x0] +; LSE-NEXT: ret + %res = atomicrmw xchg i8** %dst, i8* %new seq_cst + ret i8* %res +} Index: llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll +++ llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll @@ -659,6 +659,15 @@ ret void } +; GCN-LABEL: {{^}}atomic_xchg_pointer_offset: +; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @atomic_xchg_pointer_offset(i8** %out, i8* %in) { +entry: + %gep = getelementptr i8*, i8** %out, i32 4 + %val = atomicrmw volatile xchg i8** %gep, i8* %in seq_cst + ret void +} + ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: ; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] Index: llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll +++ llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll @@ -794,6 +794,17 @@ ret void } +; GCN-LABEL: {{^}}atomic_xchg_pointer_offset: +; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} + +; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} +define amdgpu_kernel void @atomic_xchg_pointer_offset(i8* addrspace(1)* %out, i8* %in) { +entry: + %gep = getelementptr i8*, i8* addrspace(1)* %out, i64 4 + %tmp0 = atomicrmw volatile xchg i8* addrspace(1)* %gep, i8* %in seq_cst + ret void +} + ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: ; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; CIVI: buffer_store_dwordx2 [[RET]] Index: llvm/test/CodeGen/AMDGPU/local-atomics64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/local-atomics64.ll +++ llvm/test/CodeGen/AMDGPU/local-atomics64.ll @@ -40,6 +40,19 @@ ret void } +; GCN-LABEL: {{^}}lds_atomic_xchg_ret_pointer_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_xchg_ret_pointer_offset(i8* addrspace(1)* %out, i8* addrspace(3)* %ptr) nounwind { + %gep = getelementptr i8*, i8* addrspace(3)* %ptr, i32 4 + %result = atomicrmw xchg i8* addrspace(3)* %gep, i8* null seq_cst + store i8* %result, i8* addrspace(1)* %out, align 8 + ret void +} + ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64: ; SICIVI: s_mov_b32 m0 ; GFX9-NOT: m0 Index: llvm/test/CodeGen/X86/atomic64.ll =================================================================== --- llvm/test/CodeGen/X86/atomic64.ll +++ llvm/test/CodeGen/X86/atomic64.ll @@ -4,6 +4,7 @@ @sc64 = external dso_local global i64 @fsc64 = external dso_local global double +@psc64 = external dso_local global i8* define void @atomic_fetch_add64() nounwind { ; X64-LABEL: atomic_fetch_add64: @@ -780,3 +781,18 @@ %t1 = atomicrmw xchg double* @fsc64, double %x acquire ret void } + +define void @atomic_fetch_swapptr(i8* %x) nounwind { +; X64-LABEL: atomic_fetch_swapptr: +; X64: # %bb.0: +; X64-NEXT: xchgq %rdi, psc64(%rip) +; X64-NEXT: retq +; +; I486-LABEL: atomic_fetch_swapptr: +; I486: # %bb.0: +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: xchgl %eax, psc64 +; I486-NEXT: retl + %t1 = atomicrmw xchg i8** @psc64, i8* %x acquire + ret void +} Index: llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-pointer.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-pointer.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O1 -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s +; RUN: opt -O1 -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS + +define void @atomic_swap_pointer(i8** %ptr, i8* %val) nounwind { +; CHECK-LABEL: @atomic_swap_pointer( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[PTR:%.*]] to i64* +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint i8* [[VAL:%.*]] to i64 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i64(i64* elementtype(i64) [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.aarch64.stxr.p0i64(i64 [[TMP2]], i64* elementtype(i64) [[TMP1]]) +; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to i8* +; CHECK-NEXT: ret void +; +; OUTLINE-ATOMICS-LABEL: @atomic_swap_pointer( +; OUTLINE-ATOMICS-NEXT: [[TMP1:%.*]] = bitcast i8** [[PTR:%.*]] to i64* +; OUTLINE-ATOMICS-NEXT: [[TMP2:%.*]] = ptrtoint i8* [[VAL:%.*]] to i64 +; OUTLINE-ATOMICS-NEXT: [[TMP3:%.*]] = atomicrmw xchg i64* [[TMP1]], i64 [[TMP2]] acquire, align 8 +; OUTLINE-ATOMICS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to i8* +; OUTLINE-ATOMICS-NEXT: ret void +; + %t1 = atomicrmw xchg i8** %ptr, i8* %val acquire + ret void +} Index: llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-pointer.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-pointer.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=i686-linux-gnu -atomic-expand %s | FileCheck %s + +define i8* @atomic_xchg_pointer(i8** %ptr) nounwind { +; CHECK-LABEL: @atomic_xchg_pointer( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[PTR:%.*]] to i64* +; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg i64* [[TMP1]], i64 0 seq_cst, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i8* +; CHECK-NEXT: ret i8* [[TMP3]] +; + %result = atomicrmw xchg i8** %ptr, i8* null seq_cst + ret i8* %result +} + +define i8* @atomic_xchg_pointer_as1(i8* addrspace(1)* %ptr) nounwind { +; CHECK-LABEL: @atomic_xchg_pointer_as1( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* addrspace(1)* [[PTR:%.*]] to i64 addrspace(1)* +; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg i64 addrspace(1)* [[TMP1]], i64 0 seq_cst, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i8* +; CHECK-NEXT: ret i8* [[TMP3]] +; + %result = atomicrmw xchg i8* addrspace(1)* %ptr, i8* null seq_cst + ret i8* %result +}