diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -132,9 +132,7 @@ bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; - bool shouldInsertFencesForAtomic(const Instruction *I) const override { - return isa(I) || isa(I); - } + bool shouldInsertFencesForAtomic(const Instruction *I) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1746,3 +1746,21 @@ bool LoongArchTargetLowering::isCheapToSpeculateCttz() const { return true; } bool LoongArchTargetLowering::isCheapToSpeculateCtlz() const { return true; } + +bool LoongArchTargetLowering::shouldInsertFencesForAtomic( + const Instruction *I) const { + if (!Subtarget.is64Bit()) + return isa(I) || isa(I); + + if (isa(I)) + return true; + + // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not + // require fences beacuse we can use amswap_db.[w/d]. + if (isa(I)) { + unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth(); + return (Size == 8 || Size == 16); + } + + return false; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -995,13 +995,48 @@ defm : LdPat; defm : LdPat; +class release_seqcst_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getSuccessOrdering(); + return isReleaseOrStronger(Ordering); +}]>; + +class unordered_monotonic_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getSuccessOrdering(); + return !isReleaseOrStronger(Ordering); +}]>; + +def atomic_store_release_seqcst_32 : release_seqcst_store; +def atomic_store_release_seqcst_64 : release_seqcst_store; +def atomic_store_unordered_monotonic_32 + : unordered_monotonic_store; +def atomic_store_unordered_monotonic_64 + : unordered_monotonic_store; + defm : StPat; defm : StPat; -defm : StPat, Requires<[IsLA32]>; +defm : StPat, + Requires<[IsLA32]>; + +def PseudoAtomicStoreW : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk), []>, + PseudoInstExpansion<(AMSWAP_DB_W R0, + GPR:$rk, GPR:$rj)>; + +def : Pat<(atomic_store_release_seqcst_32 GPR:$rj, GPR:$rk), + (PseudoAtomicStoreW GPR:$rj, GPR:$rk)>; + let Predicates = [IsLA64] in { +def PseudoAtomicStoreD : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk), []>, + PseudoInstExpansion<(AMSWAP_DB_D R0, + GPR:$rk, GPR:$rj)>; + +def : Pat<(atomic_store_release_seqcst_64 GPR:$rj, GPR:$rk), + (PseudoAtomicStoreD GPR:$rj, GPR:$rk)>; + defm : LdPat; -defm : StPat; -defm : StPat; +defm : StPat; +defm : StPat; } // Predicates = [IsLA64] /// Other pseudo-instructions diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll @@ -113,8 +113,7 @@ ; ; LA64-LABEL: store_release_i32: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: st.w $a0, $a1, 0 +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 ; LA64-NEXT: ret store atomic i32 %v, ptr %ptr release, align 4 ret void @@ -135,9 +134,207 @@ ; ; LA64-LABEL: store_release_i64: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: st.d $a0, $a1, 0 +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 ; LA64-NEXT: ret store atomic i64 %v, ptr %ptr release, align 8 ret void } + +define void @store_unordered_i8(ptr %ptr, i8 signext %v) { +; LA32-LABEL: store_unordered_i8: +; LA32: # %bb.0: +; LA32-NEXT: st.b $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_i8: +; LA64: # %bb.0: +; LA64-NEXT: st.b $a0, $a1, 0 +; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr unordered, align 1 + ret void +} + +define void @store_unordered_i16(ptr %ptr, i16 signext %v) { +; LA32-LABEL: store_unordered_i16: +; LA32: # %bb.0: +; LA32-NEXT: st.h $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_i16: +; LA64: # %bb.0: +; LA64-NEXT: st.h $a0, $a1, 0 +; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr unordered, align 2 + ret void +} + +define void @store_unordered_i32(ptr %ptr, i32 signext %v) { +; LA32-LABEL: store_unordered_i32: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_i32: +; LA64: # %bb.0: +; LA64-NEXT: st.w $a0, $a1, 0 +; LA64-NEXT: ret + store atomic i32 %v, ptr %ptr unordered, align 4 + ret void +} + +define void @store_unordered_i64(ptr %ptr, i64 %v) { +; LA32-LABEL: store_unordered_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_i64: +; LA64: # %bb.0: +; LA64-NEXT: st.d $a0, $a1, 0 +; LA64-NEXT: ret + store atomic i64 %v, ptr %ptr unordered, align 8 + ret void +} + +define void @store_monotonic_i8(ptr %ptr, i8 signext %v) { +; LA32-LABEL: store_monotonic_i8: +; LA32: # %bb.0: +; LA32-NEXT: st.b $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_i8: +; LA64: # %bb.0: +; LA64-NEXT: st.b $a0, $a1, 0 +; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr monotonic, align 1 + ret void +} + +define void @store_monotonic_i16(ptr %ptr, i16 signext %v) { +; LA32-LABEL: store_monotonic_i16: +; LA32: # %bb.0: +; LA32-NEXT: st.h $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_i16: +; LA64: # %bb.0: +; LA64-NEXT: st.h $a0, $a1, 0 +; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr monotonic, align 2 + ret void +} + +define void @store_monotonic_i32(ptr %ptr, i32 signext %v) { +; LA32-LABEL: store_monotonic_i32: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_i32: +; LA64: # %bb.0: +; LA64-NEXT: st.w $a0, $a1, 0 +; LA64-NEXT: ret + store atomic i32 %v, ptr %ptr monotonic, align 4 + ret void +} + +define void @store_monotonic_i64(ptr %ptr, i64 %v) { +; LA32-LABEL: store_monotonic_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_i64: +; LA64: # %bb.0: +; LA64-NEXT: st.d $a0, $a1, 0 +; LA64-NEXT: ret + store atomic i64 %v, ptr %ptr monotonic, align 8 + ret void +} + +define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { +; LA32-LABEL: store_seq_cst_i8: +; LA32: # %bb.0: +; LA32-NEXT: dbar 0 +; LA32-NEXT: st.b $a0, $a1, 0 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_i8: +; LA64: # %bb.0: +; LA64-NEXT: dbar 0 +; LA64-NEXT: st.b $a0, $a1, 0 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ret + store atomic i8 %v, ptr %ptr seq_cst, align 1 + ret void +} + +define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { +; LA32-LABEL: store_seq_cst_i16: +; LA32: # %bb.0: +; LA32-NEXT: dbar 0 +; LA32-NEXT: st.h $a0, $a1, 0 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_i16: +; LA64: # %bb.0: +; LA64-NEXT: dbar 0 +; LA64-NEXT: st.h $a0, $a1, 0 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ret + store atomic i16 %v, ptr %ptr seq_cst, align 2 + ret void +} + +define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { +; LA32-LABEL: store_seq_cst_i32: +; LA32: # %bb.0: +; LA32-NEXT: dbar 0 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_i32: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic i32 %v, ptr %ptr seq_cst, align 4 + ret void +} + +define void @store_seq_cst_i64(ptr %ptr, i64 %v) { +; LA32-LABEL: store_seq_cst_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_i64: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic i64 %v, ptr %ptr seq_cst, align 8 + ret void +} diff --git a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll b/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll --- a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll +++ b/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll @@ -98,8 +98,7 @@ ; LA32-NEXT: ret void ; ; LA64-LABEL: @store_release_i32( -; LA64-NEXT: fence release -; LA64-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 4 +; LA64-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] release, align 4 ; LA64-NEXT: ret void ; store atomic i32 %v, ptr %ptr release, align 4 @@ -112,8 +111,7 @@ ; LA32-NEXT: ret void ; ; LA64-LABEL: @store_release_i64( -; LA64-NEXT: fence release -; LA64-NEXT: store atomic i64 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 8 +; LA64-NEXT: store atomic i64 [[V:%.*]], ptr [[PTR:%.*]] release, align 8 ; LA64-NEXT: ret void ; store atomic i64 %v, ptr %ptr release, align 8