diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -519,9 +519,8 @@ bool preferZeroCompareBranch() const override { return true; } - bool shouldInsertFencesForAtomic(const Instruction *I) const override { - return isa(I) || isa(I); - } + bool shouldInsertFencesForAtomic(const Instruction *I) const override; + Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override; Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, @@ -599,6 +598,9 @@ bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9594,6 +9594,14 @@ // Returns consition code of comparison operation. ISD::CondCode getCondCode() const { return CCode; } }; + +// returns true for seq_cst stores of 32/64bit +bool canAmoSwapStoreInst(const StoreInst *SI) { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return SI->getOrdering() == AtomicOrdering::SequentiallyConsistent && + (Size == 32 || Size == 64); +} + } // namespace // Verifies conditions to apply an optimization. @@ -15026,11 +15034,39 @@ if (isa(Inst) && isAcquireOrStronger(Ord)) return Builder.CreateFence(AtomicOrdering::Acquire); - if(isa(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) - return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); + + if (isa(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) { + // We only use the trailing fence for s{b,h}. + // Table A.6 prescribes AMOSWAP for s{w,d} + // fence rw, w + // s{b,h} + // fence rw, rw + auto *SI = cast(Inst); + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + if (Size == 8 || Size == 16) + return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); + } return nullptr; } +bool RISCVTargetLowering::shouldInsertFencesForAtomic( + const Instruction *I) const { + if (isa(I)) + return true; + if (auto *SI = dyn_cast(I)) + return !canAmoSwapStoreInst(SI); + return false; +} + +TargetLoweringBase::AtomicExpansionKind +RISCVTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + if (canAmoSwapStoreInst(SI)) { + SI->setOrdering(AtomicOrdering::Release); + return AtomicExpansionKind::Expand; + } + return AtomicExpansionKind::None; +} + TargetLowering::AtomicExpansionKind RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating diff --git a/llvm/test/CodeGen/RISCV/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/atomic-load-store.ll --- a/llvm/test/CodeGen/RISCV/atomic-load-store.ll +++ b/llvm/test/CodeGen/RISCV/atomic-load-store.ll @@ -1123,17 +1123,10 @@ ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; -; RV32IA-WMO-LABEL: atomic_store_i32_seq_cst: -; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: fence rw, w -; RV32IA-WMO-NEXT: sw a1, 0(a0) -; RV32IA-WMO-NEXT: fence rw, rw -; RV32IA-WMO-NEXT: ret -; -; RV32IA-TSO-LABEL: atomic_store_i32_seq_cst: -; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: sw a1, 0(a0) -; RV32IA-TSO-NEXT: ret +; RV32IA-LABEL: atomic_store_i32_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: amoswap.w.rl a0, a1, (a0) +; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomic_store_i32_seq_cst: ; RV64I: # %bb.0: @@ -1145,17 +1138,10 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-WMO-LABEL: atomic_store_i32_seq_cst: -; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: fence rw, w -; RV64IA-WMO-NEXT: sw a1, 0(a0) -; RV64IA-WMO-NEXT: fence rw, rw -; RV64IA-WMO-NEXT: ret -; -; RV64IA-TSO-LABEL: atomic_store_i32_seq_cst: -; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: sw a1, 0(a0) -; RV64IA-TSO-NEXT: ret +; RV64IA-LABEL: atomic_store_i32_seq_cst: +; RV64IA: # %bb.0: +; RV64IA-NEXT: amoswap.w.rl a0, a1, (a0) +; RV64IA-NEXT: ret store atomic i32 %b, ptr %a seq_cst, align 4 ret void } @@ -1314,17 +1300,10 @@ ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; -; RV64IA-WMO-LABEL: atomic_store_i64_seq_cst: -; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: fence rw, w -; RV64IA-WMO-NEXT: sd a1, 0(a0) -; RV64IA-WMO-NEXT: fence rw, rw -; RV64IA-WMO-NEXT: ret -; -; RV64IA-TSO-LABEL: atomic_store_i64_seq_cst: -; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: sd a1, 0(a0) -; RV64IA-TSO-NEXT: ret +; RV64IA-LABEL: atomic_store_i64_seq_cst: +; RV64IA: # %bb.0: +; RV64IA-NEXT: amoswap.d.rl a0, a1, (a0) +; RV64IA-NEXT: ret store atomic i64 %b, ptr %a seq_cst, align 8 ret void } diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -632,9 +632,12 @@ ; ; RV32-ATOMIC-LABEL: store32_seq_cst: ; RV32-ATOMIC: # %bb.0: -; RV32-ATOMIC-NEXT: fence rw, w -; RV32-ATOMIC-NEXT: sw zero, 0(a0) -; RV32-ATOMIC-NEXT: fence rw, rw +; RV32-ATOMIC-NEXT: addi sp, sp, -16 +; RV32-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-ATOMIC-NEXT: li a1, 0 +; RV32-ATOMIC-NEXT: call __sync_lock_test_and_set_4@plt +; RV32-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-ATOMIC-NEXT: addi sp, sp, 16 ; RV32-ATOMIC-NEXT: ret ; ; RV64-NO-ATOMIC-LABEL: store32_seq_cst: @@ -650,9 +653,12 @@ ; ; RV64-ATOMIC-LABEL: store32_seq_cst: ; RV64-ATOMIC: # %bb.0: -; RV64-ATOMIC-NEXT: fence rw, w -; RV64-ATOMIC-NEXT: sw zero, 0(a0) -; RV64-ATOMIC-NEXT: fence rw, rw +; RV64-ATOMIC-NEXT: addi sp, sp, -16 +; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-ATOMIC-NEXT: li a1, 0 +; RV64-ATOMIC-NEXT: call __sync_lock_test_and_set_4@plt +; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-ATOMIC-NEXT: addi sp, sp, 16 ; RV64-ATOMIC-NEXT: ret store atomic i32 0, ptr %p seq_cst, align 4 ret void @@ -2225,9 +2231,12 @@ ; ; RV64-ATOMIC-LABEL: store64: ; RV64-ATOMIC: # %bb.0: -; RV64-ATOMIC-NEXT: fence rw, w -; RV64-ATOMIC-NEXT: sd zero, 0(a0) -; RV64-ATOMIC-NEXT: fence rw, rw +; RV64-ATOMIC-NEXT: addi sp, sp, -16 +; RV64-ATOMIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-ATOMIC-NEXT: li a1, 0 +; RV64-ATOMIC-NEXT: call __sync_lock_test_and_set_8@plt +; RV64-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-ATOMIC-NEXT: addi sp, sp, 16 ; RV64-ATOMIC-NEXT: ret store atomic i64 0, ptr %p seq_cst, align 8 ret void