diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -17495,12 +17495,10 @@ TargetLowering::AtomicExpansionKind PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { - if (AI->isFloatingPointOperation()) - return AtomicExpansionKind::None; unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128) return AtomicExpansionKind::MaskedIntrinsic; - return AtomicExpansionKind::None; + return TargetLowering::shouldExpandAtomicRMWInIR(AI); } TargetLowering::AtomicExpansionKind @@ -17511,7 +17509,7 @@ ->getPrimitiveSizeInBits(); if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128) return AtomicExpansionKind::MaskedIntrinsic; - return AtomicExpansionKind::None; + return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI); } static Intrinsic::ID diff --git a/llvm/test/CodeGen/PowerPC/atomic-float.ll b/llvm/test/CodeGen/PowerPC/atomic-float.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/atomic-float.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +; RUN: < %s | FileCheck --check-prefix=CHECK-64 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown \ +; RUN: < %s | FileCheck --check-prefix=CHECK-32 %s + +define float @test_add(float* %ptr, float %incr) { +; CHECK-64-LABEL: test_add: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: sync +; CHECK-64-NEXT: lfs 0, 0(3) +; CHECK-64-NEXT: b .LBB0_3 +; CHECK-64-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-64-NEXT: # +; CHECK-64-NEXT: stwcx. 5, 0, 3 +; CHECK-64-NEXT: .LBB0_2: # %atomicrmw.start +; CHECK-64-NEXT: # +; CHECK-64-NEXT: stw 5, -4(1) +; CHECK-64-NEXT: cmplw 5, 4 +; CHECK-64-NEXT: lfs 0, -4(1) +; CHECK-64-NEXT: beq 0, .LBB0_6 +; CHECK-64-NEXT: .LBB0_3: # %atomicrmw.start +; CHECK-64-NEXT: # =>This Loop Header: Depth=1 +; CHECK-64-NEXT: # Child Loop BB0_4 Depth 2 +; CHECK-64-NEXT: fadds 2, 0, 1 +; CHECK-64-NEXT: stfs 2, -8(1) +; CHECK-64-NEXT: stfs 0, -12(1) +; CHECK-64-NEXT: lwz 6, -8(1) +; CHECK-64-NEXT: lwz 4, -12(1) +; CHECK-64-NEXT: .LBB0_4: # %atomicrmw.start +; CHECK-64-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-64-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-64-NEXT: lwarx 5, 0, 3 +; CHECK-64-NEXT: cmpw 4, 5 +; CHECK-64-NEXT: bne 0, .LBB0_1 +; CHECK-64-NEXT: # %bb.5: # %atomicrmw.start +; CHECK-64-NEXT: # +; CHECK-64-NEXT: stwcx. 6, 0, 3 +; CHECK-64-NEXT: bne 0, .LBB0_4 +; CHECK-64-NEXT: b .LBB0_2 +; CHECK-64-NEXT: .LBB0_6: # %atomicrmw.end +; CHECK-64-NEXT: fmr 1, 0 +; CHECK-64-NEXT: lwsync +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stwu 1, -32(1) +; CHECK-32-NEXT: .cfi_def_cfa_offset 32 +; CHECK-32-NEXT: sync +; CHECK-32-NEXT: lfs 0, 0(3) +; CHECK-32-NEXT: b .LBB0_3 +; CHECK-32-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-32-NEXT: # +; CHECK-32-NEXT: stwcx. 5, 0, 3 +; CHECK-32-NEXT: .LBB0_2: # %atomicrmw.start +; CHECK-32-NEXT: # +; CHECK-32-NEXT: stw 5, 28(1) +; CHECK-32-NEXT: cmplw 5, 4 +; CHECK-32-NEXT: lfs 0, 28(1) +; CHECK-32-NEXT: beq 0, .LBB0_6 +; CHECK-32-NEXT: .LBB0_3: # %atomicrmw.start +; CHECK-32-NEXT: # =>This Loop Header: Depth=1 +; CHECK-32-NEXT: # Child Loop BB0_4 Depth 2 +; CHECK-32-NEXT: fadds 2, 0, 1 +; CHECK-32-NEXT: stfs 2, 24(1) +; CHECK-32-NEXT: stfs 0, 20(1) +; CHECK-32-NEXT: lwz 6, 24(1) +; CHECK-32-NEXT: lwz 4, 20(1) +; CHECK-32-NEXT: .LBB0_4: # %atomicrmw.start +; CHECK-32-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-32-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-32-NEXT: lwarx 5, 0, 3 +; CHECK-32-NEXT: cmpw 4, 5 +; CHECK-32-NEXT: bne 0, .LBB0_1 +; CHECK-32-NEXT: # %bb.5: # %atomicrmw.start +; CHECK-32-NEXT: # +; CHECK-32-NEXT: stwcx. 6, 0, 3 +; CHECK-32-NEXT: bne 0, .LBB0_4 +; CHECK-32-NEXT: b .LBB0_2 +; CHECK-32-NEXT: .LBB0_6: # %atomicrmw.end +; CHECK-32-NEXT: fmr 1, 0 +; CHECK-32-NEXT: lwsync +; CHECK-32-NEXT: addi 1, 1, 32 +; CHECK-32-NEXT: blr +entry: + %r = atomicrmw fadd float* %ptr, float %incr seq_cst + ret float %r +} diff --git a/llvm/test/Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll b/llvm/test/Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=powerpc64-unknown-unknown -atomic-expand %s | FileCheck %s + +define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fadd_f32( +; CHECK-NEXT: call void @llvm.ppc.sync() +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: call void @llvm.ppc.lwsync() +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fadd float* %ptr, float %value seq_cst + ret float %res +} + +define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fsub_f32( +; CHECK-NEXT: call void @llvm.ppc.sync() +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: call void @llvm.ppc.lwsync() +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fsub float* %ptr, float %value seq_cst + ret float %res +} +