Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -8431,13 +8431,14 @@ - umax - umin -The type of '' must be an integer type whose bit width is a power -of two greater than or equal to eight and less than or equal to a -target-specific size limit. The type of the '````' operand must -be a pointer to that type. If the ``atomicrmw`` is marked as -``volatile``, then the optimizer is not allowed to modify the number or -order of execution of this ``atomicrmw`` with other :ref:`volatile -operations `. +For most of these operations, the type of '' must be an integer +type whose bit width is a power of two greater than or equal to eight +and less than or equal to a target-specific size limit. For xchg, this +may be a power-of-two floating sized floating point type. The type of +the '````' operand must be a pointer to that type. If the +``atomicrmw`` is marked as ``volatile``, then the optimizer is not +allowed to modify the number or order of execution of this +``atomicrmw`` with other :ref:`volatile operations `. A ``atomicrmw`` instruction can also take an optional ":ref:`syncscope `" argument. Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -6850,12 +6850,20 @@ if (cast(Ptr->getType())->getElementType() != Val->getType()) return Error(ValLoc, "atomicrmw value and pointer type do not match"); - if (!Val->getType()->isIntegerTy()) { + if (Operation != AtomicRMWInst::Xchg && !Val->getType()->isIntegerTy()) { return Error(ValLoc, "atomicrmw " + AtomicRMWInst::getOperationName(Operation) + " operand must be an integer"); } + if (Operation == AtomicRMWInst::Xchg && + !Val->getType()->isIntegerTy() && + !Val->getType()->isFloatingPointTy()) { + return Error(ValLoc, "atomicrmw " + + AtomicRMWInst::getOperationName(Operation) + + " operand must be an integer or floating point type"); + } + unsigned Size = Val->getType()->getPrimitiveSizeInBits(); if (Size < 8 || (Size & (Size - 1))) return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized" Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -495,11 +495,25 @@ Value *Loaded, Value *NewVal, AtomicOrdering MemOpOrder, Value *&Success, Value *&NewLoaded) { + Type *OrigTy = NewVal->getType(); + + bool NeedBitcast = OrigTy->isFloatingPointTy(); + if (NeedBitcast) { + IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); + unsigned AS = Addr->getType()->getPointerAddressSpace(); + Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS)); + NewVal = Builder.CreateBitCast(NewVal, IntTy); + Loaded = Builder.CreateBitCast(Loaded, IntTy); + } + Value* Pair = Builder.CreateAtomicCmpXchg( Addr, Loaded, NewVal, MemOpOrder, AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); Success = Builder.CreateExtractValue(Pair, 1, "success"); NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); + + if (NeedBitcast) + NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); } /// Emit IR to implement the given atomicrmw operation on values in registers, Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4551,6 +4551,24 @@ Results.push_back(CvtVec); break; } + case ISD::ATOMIC_SWAP: { + AtomicSDNode *AM = cast(Node); + SDLoc SL(Node); + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal()); + assert(NVT.getSizeInBits() == OVT.getSizeInBits() && + "unexpected promotion type"); + assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() && + "unexpected atomic_swap with illegal type"); + + SDValue NewAtomic + = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT, + DAG.getVTList(NVT, MVT::Other), + { AM->getChain(), AM->getBasePtr(), CastVal }, + AM->getMemOperand()); + Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic)); + Results.push_back(NewAtomic.getValue(1)); + break; + } } // Replace the original node with the legalized result. Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -104,6 +104,7 @@ case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; case ISD::SINT_TO_FP: @@ -1932,7 +1933,7 @@ case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break; - + case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; } if (R.getNode()) @@ -2166,3 +2167,29 @@ N->getValueType(0))); } +SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + + AtomicSDNode *AM = cast(N); + SDLoc SL(N); + + SDValue CastVal = BitConvertToInteger(AM->getVal()); + EVT CastVT = CastVal.getValueType(); + + SDValue NewAtomic + = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, CastVT, + DAG.getVTList(CastVT, MVT::Other), + { AM->getChain(), AM->getBasePtr(), CastVal }, + AM->getMemOperand()); + + SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT, + NewAtomic); + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1)); + + return ResultCast; + +} + Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -620,6 +620,7 @@ SDValue PromoteFloatRes_SELECT_CC(SDNode *N); SDValue PromoteFloatRes_UnaryOp(SDNode *N); SDValue PromoteFloatRes_UNDEF(SDNode *N); + SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N); SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); bool PromoteFloatOperand(SDNode *N, unsigned OpNo); Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -583,6 +583,14 @@ std::fill(std::begin(TargetDAGCombineArray), std::end(TargetDAGCombineArray), 0); + for (MVT VT : MVT::fp_valuetypes()) { + MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); + if (IntVT.isValid()) { + setOperationAction(ISD::ATOMIC_SWAP, VT, Promote); + AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT); + } + } + // Set default actions for various operations. for (MVT VT : MVT::all_valuetypes()) { // Default all indexed load / store to expand. Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -3398,10 +3398,17 @@ PointerType *PTy = dyn_cast(RMWI.getOperand(0)->getType()); Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI); Type *ElTy = PTy->getElementType(); - Assert(ElTy->isIntegerTy(), "atomicrmw " + - AtomicRMWInst::getOperationName(Op) + - " operand must have integer type!", - &RMWI, ElTy); + if (Op == AtomicRMWInst::Xchg) { + Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), "atomicrmw " + + AtomicRMWInst::getOperationName(Op) + + " operand must have integer or floating point type!", + &RMWI, ElTy); + } else { + Assert(ElTy->isIntegerTy(), "atomicrmw " + + AtomicRMWInst::getOperationName(Op) + + " operand must have integer type!", + &RMWI, ElTy); + } checkAtomicMemAccessSize(ElTy, &RMWI); Assert(ElTy == RMWI.getOperand(1)->getType(), "Argument value type does not match pointer operand type!", &RMWI, Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11554,9 +11554,13 @@ IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr; Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys); - return Builder.CreateTruncOrBitCast( - Builder.CreateCall(Ldxr, Addr), - cast(Addr->getType())->getElementType()); + Type *EltTy = cast(Addr->getType())->getElementType(); + + const DataLayout &DL = M->getDataLayout(); + IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(EltTy)); + Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy); + + return Builder.CreateBitCast(Trunc, EltTy); } void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance( @@ -11591,6 +11595,10 @@ Type *Tys[] = { Addr->getType() }; Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys); + const DataLayout &DL = M->getDataLayout(); + IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType())); + Val = Builder.CreateBitCast(Val, IntValTy); + return Builder.CreateCall(Stxr, {Builder.CreateZExtOrBitCast( Val, Stxr->getFunctionType()->getParamType(0)), Index: test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll =================================================================== --- /dev/null +++ test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll @@ -0,0 +1,7 @@ +; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s + +; CHECK: error: atomicrmw xchg operand must be an integer or floating point type +define void @f(i32** %ptr) { + atomicrmw xchg i32** %ptr, i32* null seq_cst + ret void +} Index: test/Bitcode/compatibility.ll =================================================================== --- test/Bitcode/compatibility.ll +++ test/Bitcode/compatibility.ll @@ -761,6 +761,12 @@ ret void } +define void @fp_atomics(float* %word) { +; CHECK: %atomicrmw.xchg = atomicrmw xchg float* %word, float 1.000000e+00 monotonic + %atomicrmw.xchg = atomicrmw xchg float* %word, float 1.0 monotonic + ret void +} + ;; Fast Math Flags define void @fastmathflags_unop(float %op1) { %f.nnan = fneg nnan float %op1 Index: test/CodeGen/AMDGPU/flat_atomics.ll =================================================================== --- test/CodeGen/AMDGPU/flat_atomics.ll +++ test/CodeGen/AMDGPU/flat_atomics.ll @@ -703,6 +703,16 @@ ret void } +; GCN-LABEL: {{^}}atomic_xchg_f32_offset: +; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} +define amdgpu_kernel void @atomic_xchg_f32_offset(float* %out, float %in) { +entry: + %gep = getelementptr float, float* %out, i32 4 + %val = atomicrmw volatile xchg float* %gep, float %in seq_cst + ret void +} + ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: ; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} Index: test/CodeGen/AMDGPU/flat_atomics_i64.ll =================================================================== --- test/CodeGen/AMDGPU/flat_atomics_i64.ll +++ test/CodeGen/AMDGPU/flat_atomics_i64.ll @@ -650,6 +650,15 @@ ret void } +; GCN-LABEL: {{^}}atomic_xchg_f64_offset: +; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @atomic_xchg_f64_offset(double* %out, double %in) { +entry: + %gep = getelementptr double, double* %out, i64 4 + %tmp0 = atomicrmw volatile xchg double* %gep, double %in seq_cst + ret void +} + ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: ; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] Index: test/CodeGen/AMDGPU/global_atomics.ll =================================================================== --- test/CodeGen/AMDGPU/global_atomics.ll +++ test/CodeGen/AMDGPU/global_atomics.ll @@ -839,6 +839,17 @@ ret void } +; GCN-LABEL: {{^}}atomic_xchg_f32_offset: +; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} + +; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}} +define amdgpu_kernel void @atomic_xchg_f32_offset(float addrspace(1)* %out, float %in) { +entry: + %gep = getelementptr float, float addrspace(1)* %out, i64 4 + %val = atomicrmw volatile xchg float addrspace(1)* %gep, float %in seq_cst + ret void +} + ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: ; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; SIVI: buffer_store_dword [[RET]] Index: test/CodeGen/AMDGPU/global_atomics_i64.ll =================================================================== --- test/CodeGen/AMDGPU/global_atomics_i64.ll +++ test/CodeGen/AMDGPU/global_atomics_i64.ll @@ -783,6 +783,17 @@ ret void } +; GCN-LABEL: {{^}}atomic_xchg_f64_offset: +; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} + +; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}} +define amdgpu_kernel void @atomic_xchg_f64_offset(double addrspace(1)* %out, double %in) { +entry: + %gep = getelementptr double, double addrspace(1)* %out, i64 4 + %tmp0 = atomicrmw volatile xchg double addrspace(1)* %gep, double %in seq_cst + ret void +} + ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: ; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; CIVI: buffer_store_dwordx2 [[RET]] Index: test/CodeGen/AMDGPU/local-atomics.ll =================================================================== --- test/CodeGen/AMDGPU/local-atomics.ll +++ test/CodeGen/AMDGPU/local-atomics.ll @@ -36,6 +36,20 @@ ret void } +; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_f32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; EG: LDS_WRXCHG_RET * +; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_xchg_ret_f32_offset(float addrspace(1)* %out, float addrspace(3)* %ptr) nounwind { + %gep = getelementptr float, float addrspace(3)* %ptr, i32 4 + %result = atomicrmw xchg float addrspace(3)* %gep, float 4.0 seq_cst + store float %result, float addrspace(1)* %out, align 4 + ret void +} + ; XXX - Is it really necessary to load 4 into VGPR? ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32: ; EG: LDS_ADD_RET * Index: test/CodeGen/AMDGPU/local-atomics64.ll =================================================================== --- test/CodeGen/AMDGPU/local-atomics64.ll +++ test/CodeGen/AMDGPU/local-atomics64.ll @@ -27,6 +27,19 @@ ret void } +; GCN-LABEL: {{^}}lds_atomic_xchg_ret_f64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 +; GCN: s_endpgm +define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(double addrspace(1)* %out, double addrspace(3)* %ptr) nounwind { + %gep = getelementptr double, double addrspace(3)* %ptr, i32 4 + %result = atomicrmw xchg double addrspace(3)* %gep, double 4.0 seq_cst + store double %result, double addrspace(1)* %out, align 8 + ret void +} + ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64: ; SICIVI: s_mov_b32 m0 ; GFX9-NOT: m0 Index: test/CodeGen/X86/atomic128.ll =================================================================== --- test/CodeGen/X86/atomic128.ll +++ test/CodeGen/X86/atomic128.ll @@ -360,3 +360,27 @@ store atomic i128 %in, i128* %p unordered, align 16 ret void } + + +@fsc128 = external global fp128 + +define void @atomic_fetch_swapf128(fp128 %x) nounwind { +; CHECK-LABEL: atomic_fetch_swapf128: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movq _fsc128@{{.*}}(%rip), %rsi +; CHECK-NEXT: movq (%rsi), %rax +; CHECK-NEXT: movq 8(%rsi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB14_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lock cmpxchg16b (%rsi) +; CHECK-NEXT: jne LBB14_1 +; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %t1 = atomicrmw xchg fp128* @fsc128, fp128 %x acquire + ret void +} Index: test/CodeGen/X86/atomic16.ll =================================================================== --- test/CodeGen/X86/atomic16.ll +++ test/CodeGen/X86/atomic16.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32 @sc16 = external global i16 +@fsc16 = external global half define void @atomic_fetch_add16() nounwind { ; X64-LABEL: atomic_fetch_add16 @@ -273,3 +274,14 @@ ; X64: ret ; X32: ret } + +define void @atomic_fetch_swapf16(half %x) nounwind { + %t1 = atomicrmw xchg half* @fsc16, half %x acquire +; X64-NOT: lock +; X64: xchgw +; X32-NOT: lock +; X32: xchgw + ret void +; X64: ret +; X32: ret +} Index: test/CodeGen/X86/atomic32.ll =================================================================== --- test/CodeGen/X86/atomic32.ll +++ test/CodeGen/X86/atomic32.ll @@ -4,6 +4,7 @@ ; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -mattr=-cmov,-sse -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOCMOV @sc32 = external global i32 +@fsc32 = external global float define void @atomic_fetch_add32() nounwind { ; X64-LABEL: atomic_fetch_add32: @@ -708,3 +709,35 @@ %t1 = atomicrmw xchg i32* @sc32, i32 %x acquire ret void } + +define void @atomic_fetch_swapf32(float %x) nounwind { +; X64-LABEL: atomic_fetch_swapf32: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: xchgl %eax, {{.*}}(%rip) +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: retq +; +; X86-CMOV-LABEL: atomic_fetch_swapf32: +; X86-CMOV: # %bb.0: +; X86-CMOV-NEXT: pushl %eax +; X86-CMOV-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-CMOV-NEXT: movd %xmm0, %eax +; X86-CMOV-NEXT: xchgl %eax, fsc32 +; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: popl %eax +; X86-CMOV-NEXT: retl +; +; X86-NOCMOV-LABEL: atomic_fetch_swapf32: +; X86-NOCMOV: # %bb.0: +; X86-NOCMOV-NEXT: subl $8, %esp +; X86-NOCMOV-NEXT: flds {{[0-9]+}}(%esp) +; X86-NOCMOV-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOCMOV-NEXT: xchgl %eax, fsc32 +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: addl $8, %esp +; X86-NOCMOV-NEXT: retl + %t1 = atomicrmw xchg float* @fsc32, float %x acquire + ret void +} Index: test/CodeGen/X86/atomic64.ll =================================================================== --- test/CodeGen/X86/atomic64.ll +++ test/CodeGen/X86/atomic64.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -O0 -mtriple=x86_64-- -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64 @sc64 = external global i64 +@fsc64 = external global double define void @atomic_fetch_add64() nounwind { ; X64-LABEL: atomic_fetch_add64: @@ -233,3 +234,16 @@ ; X64: ret ; X32: ret } + +define void @atomic_fetch_swapf64(double %x) nounwind { +; X64-LABEL: atomic_fetch_swapf64: +; X32-LABEL: atomic_fetch_swapf64: + %t1 = atomicrmw xchg double* @fsc64, double %x acquire +; X64-NOT: lock +; X64: xchgq +; X32: lock +; X32: xchg8b + ret void +; X64: ret +; X32: ret +} Index: test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll =================================================================== --- /dev/null +++ test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s + +define void @atomic_swap_f16(half* %ptr, half %val) nounwind { +; CHECK-LABEL: @atomic_swap_f16( +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f16(half* [[PTR:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half +; CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[VAL:%.*]] to i16 +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f16(i64 [[TMP5]], half* [[PTR]]) +; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret void +; + %t1 = atomicrmw xchg half* %ptr, half %val acquire + ret void +} + +define void @atomic_swap_f32(float* %ptr, float %val) nounwind { +; CHECK-LABEL: @atomic_swap_f32( +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f32(float* [[PTR:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[VAL:%.*]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f32(i64 [[TMP5]], float* [[PTR]]) +; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret void +; + %t1 = atomicrmw xchg float* %ptr, float %val acquire + ret void +} + +define void @atomic_swap_f64(double* %ptr, double %val) nounwind { +; CHECK-LABEL: @atomic_swap_f64( +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f64(double* [[PTR:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[VAL:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.aarch64.stxr.p0f64(i64 [[TMP3]], double* [[PTR]]) +; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret void +; + %t1 = atomicrmw xchg double* %ptr, double %val acquire + ret void +} Index: test/Transforms/AtomicExpand/AArch64/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/AtomicExpand/AArch64/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True + Index: test/Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll =================================================================== --- /dev/null +++ test/Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=i686-linux-gnu -atomic-expand %s | FileCheck %s + +define double @atomic_xchg_f64(double* %ptr) nounwind { +; CHECK-LABEL: @atomic_xchg_f64( +; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to i64* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP3]], i64 4616189618054758400 seq_cst seq_cst +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP5]] +; + %result = atomicrmw xchg double* %ptr, double 4.0 seq_cst + ret double %result +} + +define double @atomic_xchg_f64_as1(double addrspace(1)* %ptr) nounwind { +; CHECK-LABEL: @atomic_xchg_f64_as1( +; CHECK-NEXT: [[TMP1:%.*]] = load double, double addrspace(1)* [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double addrspace(1)* [[PTR]] to i64 addrspace(1)* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP3]], i64 4616189618054758400 seq_cst seq_cst +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP5]] +; + %result = atomicrmw xchg double addrspace(1)* %ptr, double 4.0 seq_cst + ret double %result +}