Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3702,18 +3702,32 @@ return SDValue(); } -static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { SDLoc dl(Op); + AtomicOrdering FenceOrdering = + static_cast(Op.getConstantOperandVal(1)); SyncScope::ID FenceSSID = static_cast(Op.getConstantOperandVal(2)); + if (Subtarget.hasStdExtZtso()) { + // The only fence that needs an instruction is a sequentially-consistent + // cross-thread fence. + if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && + FenceSSID == SyncScope::System) { + return Op; + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); + } + // singlethread fences only synchronize with signal handlers on the same // thread and thus only need to preserve instruction order, not actually // enforce memory ordering. if (FenceSSID == SyncScope::SingleThread) // MEMBARRIER is a compiler barrier; it codegens to a no-op. return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); - return Op; } @@ -3723,7 +3737,7 @@ default: report_fatal_error("unimplemented operand"); case ISD::ATOMIC_FENCE: - return LowerATOMIC_FENCE(Op, DAG); + return LowerATOMIC_FENCE(Op, DAG, Subtarget); case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG); case ISD::BlockAddress: @@ -13753,6 +13767,12 @@ Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const { + if (Subtarget.hasStdExtZtso()) { + if (isa(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) + return Builder.CreateFence(Ord); + return nullptr; + } + if (isa(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) return Builder.CreateFence(Ord); if (isa(Inst) && isReleaseOrStronger(Ord)) @@ -13763,6 +13783,9 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const { + if (Subtarget.hasStdExtZtso()) + return nullptr; + if (isa(Inst) && isAcquireOrStronger(Ord)) return Builder.CreateFence(AtomicOrdering::Acquire); return nullptr; Index: llvm/test/CodeGen/RISCV/atomic-fence.ll =================================================================== --- llvm/test/CodeGen/RISCV/atomic-fence.ll +++ llvm/test/CodeGen/RISCV/atomic-fence.ll @@ -4,19 +4,24 @@ ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ -; RUN: | FileCheck %s +; RUN: | FileCheck --check-prefix=TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ -; RUN: | FileCheck %s +; RUN: | FileCheck --check-prefix=TSO %s define void @fence_acquire() nounwind { ; CHECK-LABEL: fence_acquire: ; CHECK: # %bb.0: ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: ret +; +; TSO-LABEL: fence_acquire: +; TSO: # %bb.0: +; TSO-NEXT: #MEMBARRIER +; TSO-NEXT: ret fence acquire ret void } @@ -26,6 +31,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: fence rw, w ; CHECK-NEXT: ret +; +; TSO-LABEL: fence_release: +; TSO: # %bb.0: +; TSO-NEXT: #MEMBARRIER +; TSO-NEXT: ret fence release ret void } @@ -35,6 +45,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: fence.tso ; CHECK-NEXT: ret +; +; TSO-LABEL: fence_acq_rel: +; TSO: # %bb.0: +; TSO-NEXT: #MEMBARRIER +; TSO-NEXT: ret fence acq_rel ret void } @@ -44,6 +59,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: fence rw, rw ; CHECK-NEXT: ret +; +; TSO-LABEL: fence_seq_cst: +; TSO: # %bb.0: +; TSO-NEXT: fence rw, rw +; TSO-NEXT: ret fence seq_cst ret void } @@ -53,6 +73,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: #MEMBARRIER ; CHECK-NEXT: ret +; +; TSO-LABEL: fence_singlethread_acquire: +; TSO: # %bb.0: +; TSO-NEXT: #MEMBARRIER +; TSO-NEXT: ret fence syncscope("singlethread") acquire ret void } @@ -62,6 +87,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: #MEMBARRIER ; CHECK-NEXT: ret +; +; TSO-LABEL: fence_singlethread_release: +; TSO: # %bb.0: +; TSO-NEXT: #MEMBARRIER +; TSO-NEXT: ret fence syncscope("singlethread") release ret void } @@ -71,6 +101,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: #MEMBARRIER ; CHECK-NEXT: ret +; +; TSO-LABEL: fence_singlethread_acq_rel: +; TSO: # %bb.0: +; TSO-NEXT: #MEMBARRIER +; TSO-NEXT: ret fence syncscope("singlethread") acq_rel ret void } @@ -80,6 +115,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: #MEMBARRIER ; CHECK-NEXT: ret +; +; TSO-LABEL: fence_singlethread_seq_cst: +; TSO: # %bb.0: +; TSO-NEXT: #MEMBARRIER +; TSO-NEXT: ret fence syncscope("singlethread") seq_cst ret void } Index: llvm/test/CodeGen/RISCV/atomic-load-store.ll =================================================================== --- llvm/test/CodeGen/RISCV/atomic-load-store.ll +++ llvm/test/CodeGen/RISCV/atomic-load-store.ll @@ -4,13 +4,13 @@ ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IA %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IA %s +; RUN: | FileCheck -check-prefix=RV32IA-TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64IA %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IA %s +; RUN: | FileCheck -check-prefix=RV64IA-TSO %s define i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; RV32I-LABEL: atomic_load_i8_unordered: @@ -28,6 +28,11 @@ ; RV32IA-NEXT: lb a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i8_unordered: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i8_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -42,6 +47,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: lb a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i8_unordered: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i8, ptr %a unordered, align 1 ret i8 %1 } @@ -62,6 +72,11 @@ ; RV32IA-NEXT: lb a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i8_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -76,6 +91,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: lb a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i8_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i8, ptr %a monotonic, align 1 ret i8 %1 } @@ -97,6 +117,11 @@ ; RV32IA-NEXT: fence r, rw ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i8_acquire: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -112,6 +137,11 @@ ; RV64IA-NEXT: lb a0, 0(a0) ; RV64IA-NEXT: fence r, rw ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i8, ptr %a acquire, align 1 ret i8 %1 } @@ -134,6 +164,12 @@ ; RV32IA-NEXT: fence r, rw ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i8_seq_cst: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -150,6 +186,12 @@ ; RV64IA-NEXT: lb a0, 0(a0) ; RV64IA-NEXT: fence r, rw ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i8, ptr %a seq_cst, align 1 ret i8 %1 } @@ -170,6 +212,11 @@ ; RV32IA-NEXT: lh a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i16_unordered: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lh a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i16_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -184,6 +231,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: lh a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i16_unordered: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lh a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i16, ptr %a unordered, align 2 ret i16 %1 } @@ -204,6 +256,11 @@ ; RV32IA-NEXT: lh a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i16_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lh a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -218,6 +275,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: lh a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i16_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lh a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i16, ptr %a monotonic, align 2 ret i16 %1 } @@ -239,6 +301,11 @@ ; RV32IA-NEXT: fence r, rw ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lh a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i16_acquire: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -254,6 +321,11 @@ ; RV64IA-NEXT: lh a0, 0(a0) ; RV64IA-NEXT: fence r, rw ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lh a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i16, ptr %a acquire, align 2 ret i16 %1 } @@ -276,6 +348,12 @@ ; RV32IA-NEXT: fence r, rw ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lh a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i16_seq_cst: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -292,6 +370,12 @@ ; RV64IA-NEXT: lh a0, 0(a0) ; RV64IA-NEXT: fence r, rw ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lh a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i16, ptr %a seq_cst, align 2 ret i16 %1 } @@ -312,6 +396,11 @@ ; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i32_unordered: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i32_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -326,6 +415,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: lw a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i32_unordered: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i32, ptr %a unordered, align 4 ret i32 %1 } @@ -346,6 +440,11 @@ ; RV32IA-NEXT: lw a0, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i32_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -360,6 +459,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: lw a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i32_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i32, ptr %a monotonic, align 4 ret i32 %1 } @@ -381,6 +485,11 @@ ; RV32IA-NEXT: fence r, rw ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i32_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i32_acquire: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -396,6 +505,11 @@ ; RV64IA-NEXT: lw a0, 0(a0) ; RV64IA-NEXT: fence r, rw ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i32_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i32, ptr %a acquire, align 4 ret i32 %1 } @@ -418,6 +532,12 @@ ; RV32IA-NEXT: fence r, rw ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i32_seq_cst: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -434,6 +554,12 @@ ; RV64IA-NEXT: lw a0, 0(a0) ; RV64IA-NEXT: fence r, rw ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i32, ptr %a seq_cst, align 4 ret i32 %1 } @@ -459,6 +585,16 @@ ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i64_unordered: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: li a1, 0 +; RV32IA-TSO-NEXT: call __atomic_load_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i64_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -473,6 +609,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: ld a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i64_unordered: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i64, ptr %a unordered, align 8 ret i64 %1 } @@ -498,6 +639,16 @@ ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i64_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: li a1, 0 +; RV32IA-TSO-NEXT: call __atomic_load_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -512,6 +663,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: ld a0, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i64_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i64, ptr %a monotonic, align 8 ret i64 %1 } @@ -537,6 +693,16 @@ ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i64_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: li a1, 2 +; RV32IA-TSO-NEXT: call __atomic_load_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i64_acquire: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -552,6 +718,11 @@ ; RV64IA-NEXT: ld a0, 0(a0) ; RV64IA-NEXT: fence r, rw ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i64_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i64, ptr %a acquire, align 8 ret i64 %1 } @@ -577,6 +748,16 @@ ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_load_i64_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: li a1, 5 +; RV32IA-TSO-NEXT: call __atomic_load_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_load_i64_seq_cst: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -593,6 +774,12 @@ ; RV64IA-NEXT: ld a0, 0(a0) ; RV64IA-NEXT: fence r, rw ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: ret %1 = load atomic i64, ptr %a seq_cst, align 8 ret i64 %1 } @@ -613,6 +800,11 @@ ; RV32IA-NEXT: sb a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i8_unordered: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sb a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i8_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -627,6 +819,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: sb a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i8_unordered: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sb a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i8 %b, ptr %a unordered, align 1 ret void } @@ -647,6 +844,11 @@ ; RV32IA-NEXT: sb a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i8_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sb a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i8_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -661,6 +863,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: sb a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i8_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sb a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i8 %b, ptr %a monotonic, align 1 ret void } @@ -682,6 +889,11 @@ ; RV32IA-NEXT: sb a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i8_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sb a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i8_release: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -697,6 +909,11 @@ ; RV64IA-NEXT: fence rw, w ; RV64IA-NEXT: sb a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i8_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sb a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i8 %b, ptr %a release, align 1 ret void } @@ -718,6 +935,11 @@ ; RV32IA-NEXT: sb a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i8_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sb a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i8_seq_cst: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -733,6 +955,11 @@ ; RV64IA-NEXT: fence rw, w ; RV64IA-NEXT: sb a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i8_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sb a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i8 %b, ptr %a seq_cst, align 1 ret void } @@ -753,6 +980,11 @@ ; RV32IA-NEXT: sh a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i16_unordered: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sh a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i16_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -767,6 +999,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: sh a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i16_unordered: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sh a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i16 %b, ptr %a unordered, align 2 ret void } @@ -787,6 +1024,11 @@ ; RV32IA-NEXT: sh a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i16_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sh a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i16_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -801,6 +1043,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: sh a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i16_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sh a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i16 %b, ptr %a monotonic, align 2 ret void } @@ -822,6 +1069,11 @@ ; RV32IA-NEXT: sh a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i16_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sh a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i16_release: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -837,6 +1089,11 @@ ; RV64IA-NEXT: fence rw, w ; RV64IA-NEXT: sh a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i16_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sh a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i16 %b, ptr %a release, align 2 ret void } @@ -858,6 +1115,11 @@ ; RV32IA-NEXT: sh a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i16_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sh a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i16_seq_cst: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -873,6 +1135,11 @@ ; RV64IA-NEXT: fence rw, w ; RV64IA-NEXT: sh a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i16_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sh a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i16 %b, ptr %a seq_cst, align 2 ret void } @@ -893,6 +1160,11 @@ ; RV32IA-NEXT: sw a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i32_unordered: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i32_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -907,6 +1179,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: sw a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i32_unordered: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i32 %b, ptr %a unordered, align 4 ret void } @@ -927,6 +1204,11 @@ ; RV32IA-NEXT: sw a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i32_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i32_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -941,6 +1223,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: sw a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i32_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i32 %b, ptr %a monotonic, align 4 ret void } @@ -962,6 +1249,11 @@ ; RV32IA-NEXT: sw a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i32_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i32_release: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -977,6 +1269,11 @@ ; RV64IA-NEXT: fence rw, w ; RV64IA-NEXT: sw a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i32_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i32 %b, ptr %a release, align 4 ret void } @@ -998,6 +1295,11 @@ ; RV32IA-NEXT: sw a1, 0(a0) ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i32_seq_cst: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1013,6 +1315,11 @@ ; RV64IA-NEXT: fence rw, w ; RV64IA-NEXT: sw a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i32 %b, ptr %a seq_cst, align 4 ret void } @@ -1038,6 +1345,16 @@ ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i64_unordered: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: li a3, 0 +; RV32IA-TSO-NEXT: call __atomic_store_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i64_unordered: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1052,6 +1369,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: sd a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i64_unordered: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i64 %b, ptr %a unordered, align 8 ret void } @@ -1077,6 +1399,16 @@ ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i64_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: li a3, 0 +; RV32IA-TSO-NEXT: call __atomic_store_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i64_monotonic: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1091,6 +1423,11 @@ ; RV64IA: # %bb.0: ; RV64IA-NEXT: sd a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i64_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i64 %b, ptr %a monotonic, align 8 ret void } @@ -1116,6 +1453,16 @@ ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i64_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: li a3, 3 +; RV32IA-TSO-NEXT: call __atomic_store_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i64_release: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1131,6 +1478,11 @@ ; RV64IA-NEXT: fence rw, w ; RV64IA-NEXT: sd a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i64_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i64 %b, ptr %a release, align 8 ret void } @@ -1156,6 +1508,16 @@ ; RV32IA-NEXT: addi sp, sp, 16 ; RV32IA-NEXT: ret ; +; RV32IA-TSO-LABEL: atomic_store_i64_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: addi sp, sp, -16 +; RV32IA-TSO-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-TSO-NEXT: li a3, 5 +; RV32IA-TSO-NEXT: call __atomic_store_8@plt +; RV32IA-TSO-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-TSO-NEXT: addi sp, sp, 16 +; RV32IA-TSO-NEXT: ret +; ; RV64I-LABEL: atomic_store_i64_seq_cst: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 @@ -1171,6 +1533,11 @@ ; RV64IA-NEXT: fence rw, w ; RV64IA-NEXT: sd a1, 0(a0) ; RV64IA-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_i64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: ret store atomic i64 %b, ptr %a seq_cst, align 8 ret void }