diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -78,6 +78,7 @@ StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); bool expandAtomicStore(StoreInst *SI); bool tryExpandAtomicRMW(AtomicRMWInst *AI); + AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); Value * insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, @@ -281,9 +282,18 @@ if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { MadeChange = true; } else { + AtomicRMWInst::BinOp Op = RMWI->getOperation(); + if (Op == AtomicRMWInst::Xchg && + RMWI->getValOperand()->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + RMWI = convertAtomicXchgToIntegerType(RMWI); + assert(RMWI->getValOperand()->getType()->isIntegerTy() && + "invariant broken"); + MadeChange = true; + } unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; unsigned ValueSize = getAtomicOpSize(RMWI); - AtomicRMWInst::BinOp Op = RMWI->getOperation(); if (ValueSize < MinCASSize && (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || Op == AtomicRMWInst::And)) { @@ -363,6 +373,32 @@ return NewLI; } +AtomicRMWInst * +AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { + auto *M = RMWI->getModule(); + Type *NewTy = + getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); + + IRBuilder<> Builder(RMWI); + + Value *Addr = RMWI->getPointerOperand(); + Value *Val = RMWI->getValOperand(); + Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + Value *NewVal = Builder.CreateBitCast(Val, NewTy); + + auto *NewRMWI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal, + RMWI->getAlign(), RMWI->getOrdering()); + NewRMWI->setVolatile(RMWI->isVolatile()); + LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); + + Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType()); + RMWI->replaceAllUsesWith(NewRVal); + RMWI->eraseFromParent(); + return NewRMWI; +} + bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { switch (TLI->shouldExpandAtomicLoadInIR(LI)) { case TargetLoweringBase::AtomicExpansionKind::None: diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update +; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE +; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE + +define half @test_rmw_xchg_f16(half* %dst, half %new) { +; NOLSE-LABEL: test_rmw_xchg_f16: +; NOLSE: // %bb.0: +; NOLSE-NEXT: // kill: def $h0 killed $h0 def $s0 +; NOLSE-NEXT: fmov w8, s0 +; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 +; NOLSE-NEXT: ldaxrh w9, [x0] +; NOLSE-NEXT: stlxrh w10, w8, [x0] +; NOLSE-NEXT: cbnz w10, .LBB0_1 +; NOLSE-NEXT: // %bb.2: // %atomicrmw.end +; NOLSE-NEXT: fmov s0, w9 +; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_xchg_f16: +; LSE: // %bb.0: +; LSE-NEXT: // kill: def $h0 killed $h0 def $s0 +; LSE-NEXT: fmov w8, s0 +; LSE-NEXT: swpalh w8, w8, [x0] +; LSE-NEXT: fmov s0, w8 +; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0 +; LSE-NEXT: ret + %res = atomicrmw xchg half* %dst, half %new seq_cst + ret half %res +} + +define float @test_rmw_xchg_f32(float* %dst, float %new) { +; NOLSE-LABEL: test_rmw_xchg_f32: +; NOLSE: // %bb.0: +; NOLSE-NEXT: fmov w9, s0 +; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 +; NOLSE-NEXT: ldaxr w8, [x0] +; NOLSE-NEXT: stlxr w10, w9, [x0] +; NOLSE-NEXT: cbnz w10, .LBB1_1 +; NOLSE-NEXT: // %bb.2: // %atomicrmw.end +; NOLSE-NEXT: fmov s0, w8 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_xchg_f32: +; LSE: // %bb.0: +; LSE-NEXT: fmov w8, s0 +; LSE-NEXT: swpal w8, w8, [x0] +; LSE-NEXT: fmov s0, w8 +; LSE-NEXT: ret + %res = atomicrmw xchg float* %dst, float %new seq_cst + ret float %res +} + +define double @test_rmw_xchg_f64(double* %dst, double %new) { +; NOLSE-LABEL: test_rmw_xchg_f64: +; NOLSE: // %bb.0: +; NOLSE-NEXT: fmov x8, d0 +; NOLSE-NEXT: .LBB2_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 +; NOLSE-NEXT: ldaxr x9, [x0] +; NOLSE-NEXT: stlxr w10, x8, [x0] +; NOLSE-NEXT: cbnz w10, .LBB2_1 +; NOLSE-NEXT: // %bb.2: // %atomicrmw.end +; NOLSE-NEXT: fmov d0, x9 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_xchg_f64: +; LSE: // %bb.0: +; LSE-NEXT: fmov x8, d0 +; LSE-NEXT: swpal x8, x8, [x0] +; LSE-NEXT: fmov d0, x8 +; LSE-NEXT: ret + %res = atomicrmw xchg double* %dst, double %new seq_cst + ret double %res +} + +define fp128 @test_rmw_xchg_f128(fp128* %dst, fp128 %new) { +; NOLSE-LABEL: test_rmw_xchg_f128: +; NOLSE: // %bb.0: +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str q0, [sp, #16] +; NOLSE-NEXT: ldp x9, x8, [sp, #16] +; NOLSE-NEXT: .LBB3_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1 +; NOLSE-NEXT: ldaxp x11, x10, [x0] +; NOLSE-NEXT: stlxp w12, x9, x8, [x0] +; NOLSE-NEXT: cbnz w12, .LBB3_1 +; NOLSE-NEXT: // %bb.2: // %atomicrmw.end +; NOLSE-NEXT: stp x11, x10, [sp] +; NOLSE-NEXT: ldr q0, [sp], #32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_xchg_f128: +; LSE: // %bb.0: +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str q0, [sp, #16] +; LSE-NEXT: ldp x9, x8, [sp, #16] +; LSE-NEXT: .LBB3_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldaxp x11, x10, [x0] +; LSE-NEXT: stlxp w12, x9, x8, [x0] +; LSE-NEXT: cbnz w12, .LBB3_1 +; LSE-NEXT: // %bb.2: // %atomicrmw.end +; LSE-NEXT: stp x11, x10, [sp] +; LSE-NEXT: ldr q0, [sp], #32 +; LSE-NEXT: ret + %res = atomicrmw xchg fp128* %dst, fp128 %new seq_cst + ret fp128 %res +} diff --git a/llvm/test/CodeGen/X86/atomicf128.ll b/llvm/test/CodeGen/X86/atomicf128.ll --- a/llvm/test/CodeGen/X86/atomicf128.ll +++ b/llvm/test/CodeGen/X86/atomicf128.ll @@ -10,21 +10,16 @@ ; CHECK-LABEL: atomic_fetch_swapf128: ; CHECK: ## %bb.0: ; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rbx +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: movq _fsc128@{{.*}}(%rip), %rsi -; CHECK-NEXT: movaps (%rsi), %xmm1 +; CHECK-NEXT: movq (%rsi), %rax +; CHECK-NEXT: movq 8(%rsi), %rdx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_1: ## %atomicrmw.start ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rbx -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx -; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: lock cmpxchg16b (%rsi) -; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; CHECK-NEXT: jne LBB0_1 ; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end ; CHECK-NEXT: popq %rbx diff --git a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll --- a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll +++ b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll @@ -4,21 +4,25 @@ define void @atomic_swap_f16(half* %ptr, half %val) nounwind { ; CHECK-LABEL: @atomic_swap_f16( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[PTR:%.*]] to i16* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast half [[VAL:%.*]] to i16 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f16(half* [[PTR:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half -; CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[VAL:%.*]] to i16 -; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f16(i64 [[TMP5]], half* [[PTR]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i16(i16* [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i16 +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0i16(i64 [[TMP5]], i16* [[TMP1]]) ; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0 ; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] ; CHECK: atomicrmw.end: +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP4]] to half ; CHECK-NEXT: ret void ; ; OUTLINE-ATOMICS-LABEL: @atomic_swap_f16( -; OUTLINE-ATOMICS-NEXT: [[T1:%.*]] = atomicrmw xchg half* [[PTR:%.*]], half [[VAL:%.*]] acquire +; OUTLINE-ATOMICS-NEXT: [[TMP1:%.*]] = bitcast half* [[PTR:%.*]] to i16* +; OUTLINE-ATOMICS-NEXT: [[TMP2:%.*]] = bitcast half [[VAL:%.*]] to i16 +; OUTLINE-ATOMICS-NEXT: [[TMP3:%.*]] = atomicrmw xchg i16* [[TMP1]], i16 [[TMP2]] acquire, align 2 +; OUTLINE-ATOMICS-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP3]] to half ; OUTLINE-ATOMICS-NEXT: ret void ; %t1 = atomicrmw xchg half* %ptr, half %val acquire @@ -27,21 +31,25 @@ define void @atomic_swap_f32(float* %ptr, float %val) nounwind { ; CHECK-LABEL: @atomic_swap_f32( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[VAL:%.*]] to i32 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f32(float* [[PTR:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float -; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[VAL:%.*]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f32(i64 [[TMP5]], float* [[PTR]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i32(i32* [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0i32(i64 [[TMP5]], i32* [[TMP1]]) ; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0 ; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] ; CHECK: atomicrmw.end: +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP4]] to float ; CHECK-NEXT: ret void ; ; OUTLINE-ATOMICS-LABEL: @atomic_swap_f32( -; OUTLINE-ATOMICS-NEXT: [[T1:%.*]] = atomicrmw xchg float* [[PTR:%.*]], float [[VAL:%.*]] acquire +; OUTLINE-ATOMICS-NEXT: [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i32* +; OUTLINE-ATOMICS-NEXT: [[TMP2:%.*]] = bitcast float [[VAL:%.*]] to i32 +; OUTLINE-ATOMICS-NEXT: [[TMP3:%.*]] = atomicrmw xchg i32* [[TMP1]], i32 [[TMP2]] acquire, align 4 +; OUTLINE-ATOMICS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float ; OUTLINE-ATOMICS-NEXT: ret void ; %t1 = atomicrmw xchg float* %ptr, float %val acquire @@ -50,19 +58,23 @@ define void @atomic_swap_f64(double* %ptr, double %val) nounwind { ; CHECK-LABEL: @atomic_swap_f64( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTR:%.*]] to i64* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[VAL:%.*]] to i64 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f64(double* [[PTR:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[VAL:%.*]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.aarch64.stxr.p0f64(i64 [[TMP3]], double* [[PTR]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i64(i64* [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.aarch64.stxr.p0i64(i64 [[TMP2]], i64* [[TMP1]]) ; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] ; CHECK: atomicrmw.end: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP3]] to double ; CHECK-NEXT: ret void ; ; OUTLINE-ATOMICS-LABEL: @atomic_swap_f64( -; OUTLINE-ATOMICS-NEXT: [[T1:%.*]] = atomicrmw xchg double* [[PTR:%.*]], double [[VAL:%.*]] acquire +; OUTLINE-ATOMICS-NEXT: [[TMP1:%.*]] = bitcast double* [[PTR:%.*]] to i64* +; OUTLINE-ATOMICS-NEXT: [[TMP2:%.*]] = bitcast double [[VAL:%.*]] to i64 +; OUTLINE-ATOMICS-NEXT: [[TMP3:%.*]] = atomicrmw xchg i64* [[TMP1]], i64 [[TMP2]] acquire, align 8 +; OUTLINE-ATOMICS-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP3]] to double ; OUTLINE-ATOMICS-NEXT: ret void ; %t1 = atomicrmw xchg double* %ptr, double %val acquire diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll --- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll +++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll @@ -3,19 +3,18 @@ define double @atomic_xchg_f64(double* %ptr) nounwind { ; CHECK-LABEL: @atomic_xchg_f64( -; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[PTR:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTR:%.*]] to i64* +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to i64* -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP3]], i64 4616189618054758400 seq_cst seq_cst -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 -; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 -; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP3:%.*]] = cmpxchg i64* [[TMP1]], i64 [[LOADED]], i64 4616189618054758400 seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: ret double [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: ret double [[TMP4]] ; %result = atomicrmw xchg double* %ptr, double 4.0 seq_cst ret double %result @@ -23,19 +22,18 @@ define double @atomic_xchg_f64_as1(double addrspace(1)* %ptr) nounwind { ; CHECK-LABEL: @atomic_xchg_f64_as1( -; CHECK-NEXT: [[TMP1:%.*]] = load double, double addrspace(1)* [[PTR:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double addrspace(1)* [[PTR:%.*]] to i64 addrspace(1)* +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64 addrspace(1)* [[TMP1]], align 8 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast double addrspace(1)* [[PTR]] to i64 addrspace(1)* -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP3]], i64 4616189618054758400 seq_cst seq_cst -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 -; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 -; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP3:%.*]] = cmpxchg i64 addrspace(1)* [[TMP1]], i64 [[LOADED]], i64 4616189618054758400 seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: ret double [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: ret double [[TMP4]] ; %result = atomicrmw xchg double addrspace(1)* %ptr, double 4.0 seq_cst ret double %result