diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2162,6 +2162,12 @@ TargetLowering::AtomicExpansionKind LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { // TODO: Add more AtomicRMWInst that needs to be extended. + + // Since floating-point operation requires a non-trivial set of data + // operations, use CmpXChg to expand. + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size == 8 || Size == 16) return AtomicExpansionKind::MaskedIntrinsic; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll @@ -0,0 +1,691 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +;; Add more test cases after supporting different AtomicOrdering. + +define float @float_fadd_acquire(ptr %p) nounwind { +; LA64F-LABEL: float_fadd_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: .LBB0_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB0_3 Depth 2 +; LA64F-NEXT: ffint.s.w $fa2, $fa1 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa2 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: .LBB0_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB0_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB0_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB0_3 Depth=2 +; LA64F-NEXT: dbar 0 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB0_3 +; LA64F-NEXT: b .LBB0_6 +; LA64F-NEXT: .LBB0_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB0_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: addi.w $a1, $a2, 0 +; LA64F-NEXT: bne $a3, $a1, .LBB0_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fadd_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: .LBB0_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB0_3 Depth 2 +; LA64D-NEXT: ffint.s.w $fa2, $fa1 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa2 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: .LBB0_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB0_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB0_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB0_3 Depth=2 +; LA64D-NEXT: dbar 0 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB0_3 +; LA64D-NEXT: b .LBB0_6 +; LA64D-NEXT: .LBB0_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB0_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: addi.w $a1, $a2, 0 +; LA64D-NEXT: bne $a3, $a1, .LBB0_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define float @float_fsub_acquire(ptr %p) nounwind { +; LA64F-LABEL: float_fsub_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) +; LA64F-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI1_0) +; LA64F-NEXT: fld.s $fa1, $a1, 0 +; LA64F-NEXT: .LBB1_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB1_3 Depth 2 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: .LBB1_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB1_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB1_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB1_3 Depth=2 +; LA64F-NEXT: dbar 0 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB1_3 +; LA64F-NEXT: b .LBB1_6 +; LA64F-NEXT: .LBB1_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB1_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: addi.w $a1, $a2, 0 +; LA64F-NEXT: bne $a3, $a1, .LBB1_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fsub_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) +; LA64D-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI1_0) +; LA64D-NEXT: fld.s $fa1, $a1, 0 +; LA64D-NEXT: .LBB1_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB1_3 Depth 2 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: .LBB1_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB1_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB1_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB1_3 Depth=2 +; LA64D-NEXT: dbar 0 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB1_3 +; LA64D-NEXT: b .LBB1_6 +; LA64D-NEXT: .LBB1_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB1_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: addi.w $a1, $a2, 0 +; LA64D-NEXT: bne $a3, $a1, .LBB1_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define float @float_fmin_acquire(ptr %p) nounwind { +; LA64F-LABEL: float_fmin_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: .LBB2_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB2_3 Depth 2 +; LA64F-NEXT: ffint.s.w $fa2, $fa1 +; LA64F-NEXT: fmax.s $fa3, $fa0, $fa0 +; LA64F-NEXT: fmin.s $fa2, $fa3, $fa2 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: .LBB2_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB2_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB2_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB2_3 Depth=2 +; LA64F-NEXT: dbar 0 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB2_3 +; LA64F-NEXT: b .LBB2_6 +; LA64F-NEXT: .LBB2_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB2_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: addi.w $a1, $a2, 0 +; LA64F-NEXT: bne $a3, $a1, .LBB2_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmin_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: .LBB2_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB2_3 Depth 2 +; LA64D-NEXT: ffint.s.w $fa2, $fa1 +; LA64D-NEXT: fmax.s $fa3, $fa0, $fa0 +; LA64D-NEXT: fmin.s $fa2, $fa3, $fa2 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: .LBB2_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB2_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB2_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB2_3 Depth=2 +; LA64D-NEXT: dbar 0 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB2_3 +; LA64D-NEXT: b .LBB2_6 +; LA64D-NEXT: .LBB2_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB2_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: addi.w $a1, $a2, 0 +; LA64D-NEXT: bne $a3, $a1, .LBB2_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define float @float_fmax_acquire(ptr %p) nounwind { +; LA64F-LABEL: float_fmax_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: fld.s $fa0, $a0, 0 +; LA64F-NEXT: addi.w $a1, $zero, 1 +; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: .LBB3_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Loop Header: Depth=1 +; LA64F-NEXT: # Child Loop BB3_3 Depth 2 +; LA64F-NEXT: ffint.s.w $fa2, $fa1 +; LA64F-NEXT: fmax.s $fa3, $fa0, $fa0 +; LA64F-NEXT: fmax.s $fa2, $fa3, $fa2 +; LA64F-NEXT: movfr2gr.s $a1, $fa2 +; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: .LBB3_3: # %atomicrmw.start +; LA64F-NEXT: # Parent Loop BB3_1 Depth=1 +; LA64F-NEXT: # => This Inner Loop Header: Depth=2 +; LA64F-NEXT: ll.w $a3, $a0, 0 +; LA64F-NEXT: bne $a3, $a2, .LBB3_5 +; LA64F-NEXT: # %bb.4: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB3_3 Depth=2 +; LA64F-NEXT: dbar 0 +; LA64F-NEXT: move $a4, $a1 +; LA64F-NEXT: sc.w $a4, $a0, 0 +; LA64F-NEXT: beqz $a4, .LBB3_3 +; LA64F-NEXT: b .LBB3_6 +; LA64F-NEXT: .LBB3_5: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: .LBB3_6: # %atomicrmw.start +; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LA64F-NEXT: movgr2fr.w $fa0, $a3 +; LA64F-NEXT: addi.w $a1, $a2, 0 +; LA64F-NEXT: bne $a3, $a1, .LBB3_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ret +; +; LA64D-LABEL: float_fmax_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: fld.s $fa0, $a0, 0 +; LA64D-NEXT: addi.w $a1, $zero, 1 +; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: .LBB3_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Loop Header: Depth=1 +; LA64D-NEXT: # Child Loop BB3_3 Depth 2 +; LA64D-NEXT: ffint.s.w $fa2, $fa1 +; LA64D-NEXT: fmax.s $fa3, $fa0, $fa0 +; LA64D-NEXT: fmax.s $fa2, $fa3, $fa2 +; LA64D-NEXT: movfr2gr.s $a1, $fa2 +; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: .LBB3_3: # %atomicrmw.start +; LA64D-NEXT: # Parent Loop BB3_1 Depth=1 +; LA64D-NEXT: # => This Inner Loop Header: Depth=2 +; LA64D-NEXT: ll.w $a3, $a0, 0 +; LA64D-NEXT: bne $a3, $a2, .LBB3_5 +; LA64D-NEXT: # %bb.4: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB3_3 Depth=2 +; LA64D-NEXT: dbar 0 +; LA64D-NEXT: move $a4, $a1 +; LA64D-NEXT: sc.w $a4, $a0, 0 +; LA64D-NEXT: beqz $a4, .LBB3_3 +; LA64D-NEXT: b .LBB3_6 +; LA64D-NEXT: .LBB3_5: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: .LBB3_6: # %atomicrmw.start +; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LA64D-NEXT: movgr2fr.w $fa0, $a3 +; LA64D-NEXT: addi.w $a1, $a2, 0 +; LA64D-NEXT: bne $a3, $a1, .LBB3_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define double @double_fadd_acquire(ptr %p) nounwind { +; LA64F-LABEL: double_fadd_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.wu $a0, $a0, 0 +; LA64F-NEXT: ld.wu $a1, $fp, 4 +; LA64F-NEXT: slli.d $a1, $a1, 32 +; LA64F-NEXT: or $a0, $a1, $a0 +; LA64F-NEXT: ori $s0, $zero, 8 +; LA64F-NEXT: addi.d $s1, $sp, 8 +; LA64F-NEXT: addi.d $s2, $sp, 0 +; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: .LBB4_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: lu52i.d $a1, $zero, 1023 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s1 +; LA64F-NEXT: move $a3, $s2 +; LA64F-NEXT: move $a4, $s3 +; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB4_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fadd_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: ld.wu $a0, $a0, 0 +; LA64D-NEXT: ld.wu $a1, $fp, 4 +; LA64D-NEXT: slli.d $a1, $a1, 32 +; LA64D-NEXT: or $a0, $a1, $a0 +; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fs0, $a0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 2 +; LA64D-NEXT: .LBB4_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: ffint.d.l $fa1, $fs0 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB4_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fadd ptr %p, double 1.0 acquire, align 4 + ret double %v +} + +define double @double_fsub_acquire(ptr %p) nounwind { +; LA64F-LABEL: double_fsub_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.wu $a0, $a0, 0 +; LA64F-NEXT: ld.wu $a1, $fp, 4 +; LA64F-NEXT: slli.d $a1, $a1, 32 +; LA64F-NEXT: or $a0, $a1, $a0 +; LA64F-NEXT: ori $s0, $zero, 8 +; LA64F-NEXT: addi.d $s1, $sp, 8 +; LA64F-NEXT: addi.d $s2, $sp, 0 +; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: .LBB5_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: lu52i.d $a1, $zero, -1025 +; LA64F-NEXT: bl %plt(__adddf3) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s1 +; LA64F-NEXT: move $a3, $s2 +; LA64F-NEXT: move $a4, $s3 +; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB5_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fsub_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: ld.wu $a0, $a0, 0 +; LA64D-NEXT: ld.wu $a1, $fp, 4 +; LA64D-NEXT: slli.d $a1, $a1, 32 +; LA64D-NEXT: or $a0, $a1, $a0 +; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0) +; LA64D-NEXT: fld.d $fs0, $a0, 0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 2 +; LA64D-NEXT: .LBB5_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB5_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fsub ptr %p, double 1.0 acquire, align 4 + ret double %v +} + +define double @double_fmin_acquire(ptr %p) nounwind { +; LA64F-LABEL: double_fmin_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.wu $a0, $a0, 0 +; LA64F-NEXT: ld.wu $a1, $fp, 4 +; LA64F-NEXT: slli.d $a1, $a1, 32 +; LA64F-NEXT: or $a0, $a1, $a0 +; LA64F-NEXT: ori $s0, $zero, 8 +; LA64F-NEXT: addi.d $s1, $sp, 8 +; LA64F-NEXT: addi.d $s2, $sp, 0 +; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: .LBB6_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: lu52i.d $a1, $zero, 1023 +; LA64F-NEXT: bl %plt(fmin) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s1 +; LA64F-NEXT: move $a3, $s2 +; LA64F-NEXT: move $a4, $s3 +; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB6_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmin_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: ld.wu $a0, $a0, 0 +; LA64D-NEXT: ld.wu $a1, $fp, 4 +; LA64D-NEXT: slli.d $a1, $a1, 32 +; LA64D-NEXT: or $a0, $a1, $a0 +; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fs0, $a0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 2 +; LA64D-NEXT: .LBB6_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: ffint.d.l $fa1, $fs0 +; LA64D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB6_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fmin ptr %p, double 1.0 acquire, align 4 + ret double %v +} + +define double @double_fmax_acquire(ptr %p) nounwind { +; LA64F-LABEL: double_fmax_acquire: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -64 +; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: move $fp, $a0 +; LA64F-NEXT: ld.wu $a0, $a0, 0 +; LA64F-NEXT: ld.wu $a1, $fp, 4 +; LA64F-NEXT: slli.d $a1, $a1, 32 +; LA64F-NEXT: or $a0, $a1, $a0 +; LA64F-NEXT: ori $s0, $zero, 8 +; LA64F-NEXT: addi.d $s1, $sp, 8 +; LA64F-NEXT: addi.d $s2, $sp, 0 +; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: .LBB7_1: # %atomicrmw.start +; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: lu52i.d $a1, $zero, 1023 +; LA64F-NEXT: bl %plt(fmax) +; LA64F-NEXT: st.d $a0, $sp, 0 +; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: move $a1, $fp +; LA64F-NEXT: move $a2, $s1 +; LA64F-NEXT: move $a3, $s2 +; LA64F-NEXT: move $a4, $s3 +; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: bl %plt(__atomic_compare_exchange) +; LA64F-NEXT: move $a1, $a0 +; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: beqz $a1, .LBB7_1 +; LA64F-NEXT: # %bb.2: # %atomicrmw.end +; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ret +; +; LA64D-LABEL: double_fmax_acquire: +; LA64D: # %bb.0: +; LA64D-NEXT: addi.d $sp, $sp, -80 +; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64D-NEXT: move $fp, $a0 +; LA64D-NEXT: ld.wu $a0, $a0, 0 +; LA64D-NEXT: ld.wu $a1, $fp, 4 +; LA64D-NEXT: slli.d $a1, $a1, 32 +; LA64D-NEXT: or $a0, $a1, $a0 +; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: movgr2fr.d $fs0, $a0 +; LA64D-NEXT: ori $s0, $zero, 8 +; LA64D-NEXT: addi.d $s1, $sp, 16 +; LA64D-NEXT: addi.d $s2, $sp, 8 +; LA64D-NEXT: ori $s3, $zero, 2 +; LA64D-NEXT: .LBB7_1: # %atomicrmw.start +; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64D-NEXT: fst.d $fa0, $sp, 16 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: ffint.d.l $fa1, $fs0 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA64D-NEXT: fst.d $fa0, $sp, 8 +; LA64D-NEXT: move $a0, $s0 +; LA64D-NEXT: move $a1, $fp +; LA64D-NEXT: move $a2, $s1 +; LA64D-NEXT: move $a3, $s2 +; LA64D-NEXT: move $a4, $s3 +; LA64D-NEXT: move $a5, $s3 +; LA64D-NEXT: bl %plt(__atomic_compare_exchange) +; LA64D-NEXT: fld.d $fa0, $sp, 16 +; LA64D-NEXT: beqz $a0, .LBB7_1 +; LA64D-NEXT: # %bb.2: # %atomicrmw.end +; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64D-NEXT: addi.d $sp, $sp, 80 +; LA64D-NEXT: ret + %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4 + ret double %v +} diff --git a/llvm/test/Transforms/AtomicExpand/LoongArch/atomicrmw-fp.ll b/llvm/test/Transforms/AtomicExpand/LoongArch/atomicrmw-fp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/LoongArch/atomicrmw-fp.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S --mtriple=loongarch64 --atomic-expand --mattr=+d %s | FileCheck %s + +define float @atomicrmw_fadd_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fadd_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP5]] +; + %res = atomicrmw fadd ptr %ptr, float %value seq_cst + ret float %res +} + +define float @atomicrmw_fsub_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fsub_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP5]] +; + %res = atomicrmw fsub ptr %ptr, float %value seq_cst + ret float %res +} + +define float @atomicrmw_fmin_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fmin_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fmin ptr %ptr, float %value seq_cst + ret float %res +} + +define float @atomicrmw_fmax_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fmax_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fmax ptr %ptr, float %value seq_cst + ret float %res +} + +define double @atomicrmw_fadd_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fadd_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP5]] +; + %res = atomicrmw fadd ptr %ptr, double %value seq_cst + ret double %res +} + +define double @atomicrmw_fsub_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fsub_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP3]], i64 [[TMP2]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP5]] +; + %res = atomicrmw fsub ptr %ptr, double %value seq_cst + ret double %res +} + +define double @atomicrmw_fmin_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fmin_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP6]] +; + %res = atomicrmw fmin ptr %ptr, double %value seq_cst + ret double %res +} + +define double @atomicrmw_fmax_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fmax_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP6]] +; + %res = atomicrmw fmax ptr %ptr, double %value seq_cst + ret double %res +}