diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -111,9 +111,7 @@ bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; - bool shouldInsertFencesForAtomic(const Instruction *I) const override { - return isa(I) || isa(I); - } + bool shouldInsertFencesForAtomic(const Instruction *I) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1098,3 +1098,17 @@ return false; return (Imm.isZero() || Imm.isExactlyValue(+1.0)); } + +bool LoongArchTargetLowering::shouldInsertFencesForAtomic( + const Instruction *I) const { + if (!Subtarget.is64Bit()) { + return isa(I) || isa(I); + } else { + unsigned Size = 0; + if (isa(I)) + Size = I->getOperand(0)->getType()->getIntegerBitWidth(); + // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not + // require fences beacuse we can use amswap_db.[w/d]. + return isa(I) || (isa(I) && (Size == 8 || Size == 16)); + } +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -848,13 +848,48 @@ defm : LdPat; defm : LdPat; +class release_seqcst_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getSuccessOrdering(); + return isReleaseOrStronger(Ordering); +}]>; + +class unordered_monotonic_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getSuccessOrdering(); + return !isReleaseOrStronger(Ordering); +}]>; + +def atomic_store_release_seqcst_32 : release_seqcst_store; +def atomic_store_release_seqcst_64 : release_seqcst_store; +def atomic_store_unordered_monotonic_32 + : unordered_monotonic_store; +def atomic_store_unordered_monotonic_64 + : unordered_monotonic_store; + defm : StPat; defm : StPat; -defm : StPat, Requires<[IsLA32]>; +defm : StPat, + Requires<[IsLA32]>; + +def PseudoAtomicStoreW : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk), []>, + PseudoInstExpansion<(AMSWAP_DB_W R0, + GPR:$rk, GPR:$rj)>; + +def : Pat<(atomic_store_release_seqcst_32 GPR:$rj, GPR:$rk), + (PseudoAtomicStoreW GPR:$rj, GPR:$rk)>; + let Predicates = [IsLA64] in { +def PseudoAtomicStoreD : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk), []>, + PseudoInstExpansion<(AMSWAP_DB_D R0, + GPR:$rk, GPR:$rj)>; + +def : Pat<(atomic_store_release_seqcst_64 GPR:$rj, GPR:$rk), + (PseudoAtomicStoreD GPR:$rj, GPR:$rk)>; + defm : LdPat; -defm : StPat; -defm : StPat; +defm : StPat; +defm : StPat; } // Predicates = [IsLA64] /// Other pseudo-instructions diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll @@ -113,8 +113,7 @@ ; ; LA64-LABEL: store_release_i32: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: st.w $a0, $a1, 0 +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 ; LA64-NEXT: jirl $zero, $ra, 0 store atomic i32 %v, ptr %ptr release, align 4 ret void @@ -135,9 +134,207 @@ ; ; LA64-LABEL: store_release_i64: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: st.d $a0, $a1, 0 +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 ; LA64-NEXT: jirl $zero, $ra, 0 store atomic i64 %v, ptr %ptr release, align 8 ret void } + +define void @store_unordered_i8(ptr %ptr, i8 signext %v) { +; LA32-LABEL: store_unordered_i8: +; LA32: # %bb.0: +; LA32-NEXT: st.b $a0, $a1, 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_unordered_i8: +; LA64: # %bb.0: +; LA64-NEXT: st.b $a0, $a1, 0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i8 %v, ptr %ptr unordered, align 1 + ret void +} + +define void @store_unordered_i16(ptr %ptr, i16 signext %v) { +; LA32-LABEL: store_unordered_i16: +; LA32: # %bb.0: +; LA32-NEXT: st.h $a0, $a1, 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_unordered_i16: +; LA64: # %bb.0: +; LA64-NEXT: st.h $a0, $a1, 0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i16 %v, ptr %ptr unordered, align 2 + ret void +} + +define void @store_unordered_i32(ptr %ptr, i32 signext %v) { +; LA32-LABEL: store_unordered_i32: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_unordered_i32: +; LA64: # %bb.0: +; LA64-NEXT: st.w $a0, $a1, 0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i32 %v, ptr %ptr unordered, align 4 + ret void +} + +define void @store_unordered_i64(ptr %ptr, i64 %v) { +; LA32-LABEL: store_unordered_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl __atomic_store_8 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_unordered_i64: +; LA64: # %bb.0: +; LA64-NEXT: st.d $a0, $a1, 0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i64 %v, ptr %ptr unordered, align 8 + ret void +} + +define void @store_monotonic_i8(ptr %ptr, i8 signext %v) { +; LA32-LABEL: store_monotonic_i8: +; LA32: # %bb.0: +; LA32-NEXT: st.b $a0, $a1, 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_monotonic_i8: +; LA64: # %bb.0: +; LA64-NEXT: st.b $a0, $a1, 0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i8 %v, ptr %ptr monotonic, align 1 + ret void +} + +define void @store_monotonic_i16(ptr %ptr, i16 signext %v) { +; LA32-LABEL: store_monotonic_i16: +; LA32: # %bb.0: +; LA32-NEXT: st.h $a0, $a1, 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_monotonic_i16: +; LA64: # %bb.0: +; LA64-NEXT: st.h $a0, $a1, 0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i16 %v, ptr %ptr monotonic, align 2 + ret void +} + +define void @store_monotonic_i32(ptr %ptr, i32 signext %v) { +; LA32-LABEL: store_monotonic_i32: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_monotonic_i32: +; LA64: # %bb.0: +; LA64-NEXT: st.w $a0, $a1, 0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i32 %v, ptr %ptr monotonic, align 4 + ret void +} + +define void @store_monotonic_i64(ptr %ptr, i64 %v) { +; LA32-LABEL: store_monotonic_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl __atomic_store_8 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_monotonic_i64: +; LA64: # %bb.0: +; LA64-NEXT: st.d $a0, $a1, 0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i64 %v, ptr %ptr monotonic, align 8 + ret void +} + +define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { +; LA32-LABEL: store_seq_cst_i8: +; LA32: # %bb.0: +; LA32-NEXT: dbar 0 +; LA32-NEXT: st.b $a0, $a1, 0 +; LA32-NEXT: dbar 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_seq_cst_i8: +; LA64: # %bb.0: +; LA64-NEXT: dbar 0 +; LA64-NEXT: st.b $a0, $a1, 0 +; LA64-NEXT: dbar 0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i8 %v, ptr %ptr seq_cst, align 1 + ret void +} + +define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { +; LA32-LABEL: store_seq_cst_i16: +; LA32: # %bb.0: +; LA32-NEXT: dbar 0 +; LA32-NEXT: st.h $a0, $a1, 0 +; LA32-NEXT: dbar 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_seq_cst_i16: +; LA64: # %bb.0: +; LA64-NEXT: dbar 0 +; LA64-NEXT: st.h $a0, $a1, 0 +; LA64-NEXT: dbar 0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i16 %v, ptr %ptr seq_cst, align 2 + ret void +} + +define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { +; LA32-LABEL: store_seq_cst_i32: +; LA32: # %bb.0: +; LA32-NEXT: dbar 0 +; LA32-NEXT: st.w $a0, $a1, 0 +; LA32-NEXT: dbar 0 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_seq_cst_i32: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i32 %v, ptr %ptr seq_cst, align 4 + ret void +} + +define void @store_seq_cst_i64(ptr %ptr, i64 %v) { +; LA32-LABEL: store_seq_cst_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl __atomic_store_8 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: jirl $zero, $ra, 0 +; +; LA64-LABEL: store_seq_cst_i64: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: jirl $zero, $ra, 0 + store atomic i64 %v, ptr %ptr seq_cst, align 8 + ret void +} diff --git a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll b/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll --- a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll +++ b/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll @@ -98,8 +98,7 @@ ; LA32-NEXT: ret void ; ; LA64-LABEL: @store_release_i32( -; LA64-NEXT: fence release -; LA64-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 4 +; LA64-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] release, align 4 ; LA64-NEXT: ret void ; store atomic i32 %v, ptr %ptr release, align 4 @@ -112,8 +111,7 @@ ; LA32-NEXT: ret void ; ; LA64-LABEL: @store_release_i64( -; LA64-NEXT: fence release -; LA64-NEXT: store atomic i64 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 8 +; LA64-NEXT: store atomic i64 [[V:%.*]], ptr [[PTR:%.*]] release, align 8 ; LA64-NEXT: ret void ; store atomic i64 %v, ptr %ptr release, align 8