diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -223,6 +223,7 @@ MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; + SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -128,6 +128,8 @@ setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); } + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + static const ISD::CondCode FPCCToExpand[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, ISD::SETGE, ISD::SETNE, ISD::SETGT}; @@ -213,6 +215,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { + case ISD::ATOMIC_FENCE: + return lowerATOMIC_FENCE(Op, DAG); case ISD::EH_DWARF_CFA: return lowerEH_DWARF_CFA(Op, DAG); case ISD::GlobalAddress: @@ -257,6 +261,22 @@ return SDValue(); } +SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SyncScope::ID FenceSSID = + static_cast(Op.getConstantOperandVal(2)); + + // singlethread fences only synchronize with signal handlers on the same + // thread and thus only need to preserve instruction order, not actually + // enforce memory ordering. + if (FenceSSID == SyncScope::SingleThread) + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); + + return Op; +} + SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1512,7 +1512,29 @@ /// Atomic loads and stores -def : Pat<(atomic_fence timm, timm), (DBAR 0)>; +// DBAR hint encoding for LA664 and later micro-architectures, paraphrased from +// the Linux patch revealing it [1]: +// +// - Bit 4: kind of constraint (0: completion, 1: ordering) +// - Bit 3: barrier for previous read (0: true, 1: false) +// - Bit 2: barrier for previous write (0: true, 1: false) +// - Bit 1: barrier for succeeding read (0: true, 1: false) +// - Bit 0: barrier for succeeding write (0: true, 1: false) +// +// Hint 0x700: barrier for "read after read" from the same address, which is +// e.g. needed by LL-SC loops on older models. (DBAR 0x700 behaves the same as +// nop if such reordering is disabled on supporting newer models.) +// +// [1]: https://lore.kernel.org/loongarch/20230516124536.535343-1-chenhuacai@loongson.cn/ +// +// Implementations without support for the finer-granularity hints simply treat +// all as the full barrier (DBAR 0), so we can unconditionally start emiting the +// more precise hints right away. + +def : Pat<(atomic_fence 4, timm), (DBAR 0b10100)>; // acquire +def : Pat<(atomic_fence 5, timm), (DBAR 0b10010)>; // release +def : Pat<(atomic_fence 6, timm), (DBAR 0b10000)>; // acqrel +def : Pat<(atomic_fence 7, timm), (DBAR 0)>; // seqcst defm : LdPat; defm : LdPat; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll @@ -5,12 +5,12 @@ define void @fence_acquire() nounwind { ; LA32-LABEL: fence_acquire: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 20 ; LA32-NEXT: ret ; ; LA64-LABEL: fence_acquire: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 20 ; LA64-NEXT: ret fence acquire ret void @@ -19,12 +19,12 @@ define void @fence_release() nounwind { ; LA32-LABEL: fence_release: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 18 ; LA32-NEXT: ret ; ; LA64-LABEL: fence_release: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 18 ; LA64-NEXT: ret fence release ret void @@ -33,12 +33,12 @@ define void @fence_acq_rel() nounwind { ; LA32-LABEL: fence_acq_rel: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 16 ; LA32-NEXT: ret ; ; LA64-LABEL: fence_acq_rel: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 16 ; LA64-NEXT: ret fence acq_rel ret void diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll @@ -6,13 +6,13 @@ ; LA32-LABEL: load_acquire_i8: ; LA32: # %bb.0: ; LA32-NEXT: ld.b $a0, $a0, 0 -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 20 ; LA32-NEXT: ret ; ; LA64-LABEL: load_acquire_i8: ; LA64: # %bb.0: ; LA64-NEXT: ld.b $a0, $a0, 0 -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 20 ; LA64-NEXT: ret %val = load atomic i8, ptr %ptr acquire, align 1 ret i8 %val @@ -22,13 +22,13 @@ ; LA32-LABEL: load_acquire_i16: ; LA32: # %bb.0: ; LA32-NEXT: ld.h $a0, $a0, 0 -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 20 ; LA32-NEXT: ret ; ; LA64-LABEL: load_acquire_i16: ; LA64: # %bb.0: ; LA64-NEXT: ld.h $a0, $a0, 0 -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 20 ; LA64-NEXT: ret %val = load atomic i16, ptr %ptr acquire, align 2 ret i16 %val @@ -38,13 +38,13 @@ ; LA32-LABEL: load_acquire_i32: ; LA32: # %bb.0: ; LA32-NEXT: ld.w $a0, $a0, 0 -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 20 ; LA32-NEXT: ret ; ; LA64-LABEL: load_acquire_i32: ; LA64: # %bb.0: ; LA64-NEXT: ld.w $a0, $a0, 0 -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 20 ; LA64-NEXT: ret %val = load atomic i32, ptr %ptr acquire, align 4 ret i32 %val @@ -66,7 +66,7 @@ ; LA64-LABEL: load_acquire_i64: ; LA64: # %bb.0: ; LA64-NEXT: ld.d $a0, $a0, 0 -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 20 ; LA64-NEXT: ret %val = load atomic i64, ptr %ptr acquire, align 8 ret i64 %val @@ -75,13 +75,13 @@ define void @store_release_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_release_i8: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 18 ; LA32-NEXT: st.b $a1, $a0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: store_release_i8: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 18 ; LA64-NEXT: st.b $a1, $a0, 0 ; LA64-NEXT: ret store atomic i8 %v, ptr %ptr release, align 1 @@ -91,13 +91,13 @@ define void @store_release_i16(ptr %ptr, i16 signext %v) { ; LA32-LABEL: store_release_i16: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 18 ; LA32-NEXT: st.h $a1, $a0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: store_release_i16: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 +; LA64-NEXT: dbar 18 ; LA64-NEXT: st.h $a1, $a0, 0 ; LA64-NEXT: ret store atomic i16 %v, ptr %ptr release, align 2 @@ -107,7 +107,7 @@ define void @store_release_i32(ptr %ptr, i32 signext %v) { ; LA32-LABEL: store_release_i32: ; LA32: # %bb.0: -; LA32-NEXT: dbar 0 +; LA32-NEXT: dbar 18 ; LA32-NEXT: st.w $a1, $a0, 0 ; LA32-NEXT: ret ;