diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -34,6 +34,7 @@ RISCVMachineFunctionInfo.cpp RISCVMacroFusion.cpp RISCVMergeBaseOffset.cpp + RISCVOptAMOInstrs.cpp RISCVOptWInstrs.cpp RISCVRedundantCopyElimination.cpp RISCVMoveMerger.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -42,6 +42,10 @@ FunctionPass *createRISCVGatherScatterLoweringPass(); void initializeRISCVGatherScatterLoweringPass(PassRegistry &); +FunctionPass *createRISCVOptAMOInstrsPass(); +void initializeRISCVOptAMOInstrsPass(PassRegistry &); +extern char &RISCVOptAMOInstrsID; + FunctionPass *createRISCVOptWInstrsPass(); void initializeRISCVOptWInstrsPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVOptAMOInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptAMOInstrs.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVOptAMOInstrs.cpp @@ -0,0 +1,110 @@ +//===- RISCVOptAMOInstrs.cpp - MI AMO instruction optimizations ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// This pass rewrites Rd to x0 for AMO instrs whose return values are unused. +// +//===---------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +using namespace llvm; +#define DEBUG_TYPE "riscv-opt-amo-instrs" +#define RISCV_OPT_AMO_INSTRS_NAME "RISC-V Optimize AMO Instructions" + +STATISTIC(NumRewritedAMOInstrs, "Number of rewrited AMO instrs"); + +static cl::opt + DisableAMORdDiscard("riscv-disable-amo-rd-discard", + cl::desc("Disable rewriting of rd for AMO instrs whose " + "return values are unused"), + cl::init(false), cl::Hidden); + +namespace { +class RISCVOptAMOInstrs : public MachineFunctionPass { +public: + static char ID; + + RISCVOptAMOInstrs() : MachineFunctionPass(ID) { + initializeRISCVOptAMOInstrsPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_OPT_AMO_INSTRS_NAME; } +}; +} // end anonymous namespace + +char RISCVOptAMOInstrs::ID = 0; +INITIALIZE_PASS(RISCVOptAMOInstrs, DEBUG_TYPE, RISCV_OPT_AMO_INSTRS_NAME, false, + false) +char &llvm::RISCVOptAMOInstrsID = RISCVOptAMOInstrs::ID; + +FunctionPass *llvm::createRISCVOptAMOInstrsPass() { + return new RISCVOptAMOInstrs(); +} + +static bool isAMOInstr(const MachineInstr &MI) { + switch (MI.getOpcode()) { +#define RISCV_HANDLE_AMO_INSTRS_FOR_EACH_ORDERING(NAME) \ + case RISCV::NAME: \ + case RISCV::NAME##_AQ: \ + case RISCV::NAME##_RL: \ + case RISCV::NAME##_AQ_RL: +#define RISCV_HANDLE_AMO_INSTRS(NAME) \ + RISCV_HANDLE_AMO_INSTRS_FOR_EACH_ORDERING(NAME##_W) \ + RISCV_HANDLE_AMO_INSTRS_FOR_EACH_ORDERING(NAME##_D) + RISCV_HANDLE_AMO_INSTRS(AMOSWAP) + RISCV_HANDLE_AMO_INSTRS(AMOADD) + RISCV_HANDLE_AMO_INSTRS(AMOAND) + RISCV_HANDLE_AMO_INSTRS(AMOOR) + RISCV_HANDLE_AMO_INSTRS(AMOXOR) + RISCV_HANDLE_AMO_INSTRS(AMOMAX) + RISCV_HANDLE_AMO_INSTRS(AMOMAXU) + RISCV_HANDLE_AMO_INSTRS(AMOMIN) + RISCV_HANDLE_AMO_INSTRS(AMOMINU) +#undef RISCV_HANDLE_AMO_INSTRS +#undef RISCV_HANDLE_AMO_INSTRS_FOR_EACH_ORDERING + return true; + default: + return false; + } +} + +bool RISCVOptAMOInstrs::runOnMachineFunction(MachineFunction &MF) { + if (DisableAMORdDiscard) + return false; + if (skipFunction(MF.getFunction())) + return false; + + const RISCVSubtarget &ST = MF.getSubtarget(); + if (!ST.hasStdExtA()) + return false; + + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!isAMOInstr(MI)) + continue; + MachineOperand &DstReg = MI.getOperand(0); + if (DstReg.isReg() && DstReg.isDead() && DstReg.getReg() != RISCV::X0) { + DstReg.setReg(RISCV::X0); + ++NumRewritedAMOInstrs; + MadeChange = true; + } + } + } + + return MadeChange; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -81,6 +81,7 @@ initializeRISCVGatherScatterLoweringPass(*PR); initializeRISCVCodeGenPreparePass(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); + initializeRISCVOptAMOInstrsPass(*PR); initializeRISCVOptWInstrsPass(*PR); initializeRISCVPreRAExpandPseudoPass(*PR); initializeRISCVExpandPseudoPass(*PR); @@ -394,7 +395,7 @@ void RISCVPassConfig::addOptimizedRegAlloc() { insertPass(&DetectDeadLanesID, &RISCVInitUndefID); - + insertPass(&DetectDeadLanesID, &RISCVOptAMOInstrsID); TargetPassConfig::addOptimizedRegAlloc(); } diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -111,6 +111,7 @@ ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: RISC-V init undef pass +; CHECK-NEXT: RISC-V Optimize AMO Instructions ; CHECK-NEXT: Process Implicit Definitions ; CHECK-NEXT: Remove unreachable machine basic blocks ; CHECK-NEXT: Live Variable Analysis diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll @@ -0,0 +1,474 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32 %s +; RUN: llc -O3 -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64 %s + +define void @amoswap_w_discard(ptr %a, i32 %b) nounwind { +; RV32-LABEL: amoswap_w_discard: +; RV32: # %bb.0: +; RV32-NEXT: amoswap.w.aqrl zero, a1, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: amoswap_w_discard: +; RV64: # %bb.0: +; RV64-NEXT: amoswap.w.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i32 %b seq_cst + ret void +} + +define void @amoswap_d_discard(ptr %a, i64 %b) nounwind { +; RV32-LABEL: amoswap_d_discard: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a3, 5 +; RV32-NEXT: call __atomic_exchange_8@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: amoswap_d_discard: +; RV64: # %bb.0: +; RV64-NEXT: amoswap.d.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw xchg ptr %a, i64 %b seq_cst + ret void +} + +define void @amoadd_w_discard(ptr %a, i32 %b) nounwind { +; RV32-LABEL: amoadd_w_discard: +; RV32: # %bb.0: +; RV32-NEXT: amoadd.w.aqrl zero, a1, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: amoadd_w_discard: +; RV64: # %bb.0: +; RV64-NEXT: amoadd.w.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw add ptr %a, i32 %b seq_cst + ret void +} + +define void @amoadd_d_discard(ptr %a, i64 %b) nounwind { +; RV32-LABEL: amoadd_d_discard: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a3, 5 +; RV32-NEXT: call __atomic_fetch_add_8@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: amoadd_d_discard: +; RV64: # %bb.0: +; RV64-NEXT: amoadd.d.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw add ptr %a, i64 %b seq_cst + ret void +} + +define void @amoand_w_discard(ptr %a, i32 %b) nounwind { +; RV32-LABEL: amoand_w_discard: +; RV32: # %bb.0: +; RV32-NEXT: amoand.w.aqrl zero, a1, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: amoand_w_discard: +; RV64: # %bb.0: +; RV64-NEXT: amoand.w.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw and ptr %a, i32 %b seq_cst + ret void +} + +define void @amoand_d_discard(ptr %a, i64 %b) nounwind { +; RV32-LABEL: amoand_d_discard: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a3, 5 +; RV32-NEXT: call __atomic_fetch_and_8@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: amoand_d_discard: +; RV64: # %bb.0: +; RV64-NEXT: amoand.d.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw and ptr %a, i64 %b seq_cst + ret void +} + +define void @amoor_w_discard(ptr %a, i32 %b) nounwind { +; RV32-LABEL: amoor_w_discard: +; RV32: # %bb.0: +; RV32-NEXT: amoor.w.aqrl zero, a1, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: amoor_w_discard: +; RV64: # %bb.0: +; RV64-NEXT: amoor.w.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw or ptr %a, i32 %b seq_cst + ret void +} + +define void @amoor_d_discard(ptr %a, i64 %b) nounwind { +; RV32-LABEL: amoor_d_discard: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a3, 5 +; RV32-NEXT: call __atomic_fetch_or_8@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: amoor_d_discard: +; RV64: # %bb.0: +; RV64-NEXT: amoor.d.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw or ptr %a, i64 %b seq_cst + ret void +} + +define void @amoxor_w_discard(ptr %a, i32 %b) nounwind { +; RV32-LABEL: amoxor_w_discard: +; RV32: # %bb.0: +; RV32-NEXT: amoor.w.aqrl zero, a1, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: amoxor_w_discard: +; RV64: # %bb.0: +; RV64-NEXT: amoor.w.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw or ptr %a, i32 %b seq_cst + ret void +} + +define void @amoxor_d_discard(ptr %a, i64 %b) nounwind { +; RV32-LABEL: amoxor_d_discard: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a3, 5 +; RV32-NEXT: call __atomic_fetch_or_8@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: amoxor_d_discard: +; RV64: # %bb.0: +; RV64-NEXT: amoor.d.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw or ptr %a, i64 %b seq_cst + ret void +} + +define void @amomax_w_discard(ptr %a, i32 %b) nounwind { +; RV32-LABEL: amomax_w_discard: +; RV32: # %bb.0: +; RV32-NEXT: amomax.w.aqrl zero, a1, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: amomax_w_discard: +; RV64: # %bb.0: +; RV64-NEXT: amomax.w.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw max ptr %a, i32 %b seq_cst + ret void +} + +define void @amomax_d_discard(ptr %a, i64 %b) nounwind { +; RV32-LABEL: amomax_d_discard: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: lw a4, 4(a0) +; RV32-NEXT: lw a5, 0(a0) +; RV32-NEXT: mv s1, a2 +; RV32-NEXT: mv s2, a1 +; RV32-NEXT: j .LBB11_2 +; RV32-NEXT: .LBB11_1: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 +; RV32-NEXT: sw a5, 8(sp) +; RV32-NEXT: sw a4, 12(sp) +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: li a4, 5 +; RV32-NEXT: li a5, 5 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __atomic_compare_exchange_8@plt +; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: bnez a0, .LBB11_6 +; RV32-NEXT: .LBB11_2: # %atomicrmw.start +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: beq a4, s1, .LBB11_4 +; RV32-NEXT: # %bb.3: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 +; RV32-NEXT: slt a0, s1, a4 +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: mv a3, a4 +; RV32-NEXT: bnez a0, .LBB11_1 +; RV32-NEXT: j .LBB11_5 +; RV32-NEXT: .LBB11_4: # in Loop: Header=BB11_2 Depth=1 +; RV32-NEXT: sltu a0, s2, a5 +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: mv a3, a4 +; RV32-NEXT: bnez a0, .LBB11_1 +; RV32-NEXT: .LBB11_5: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 +; RV32-NEXT: mv a2, s2 +; RV32-NEXT: mv a3, s1 +; RV32-NEXT: j .LBB11_1 +; RV32-NEXT: .LBB11_6: # %atomicrmw.end +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: amomax_d_discard: +; RV64: # %bb.0: +; RV64-NEXT: amomax.d.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw max ptr %a, i64 %b seq_cst + ret void +} + +define void @amomaxu_w_discard(ptr %a, i32 %b) nounwind { +; RV32-LABEL: amomaxu_w_discard: +; RV32: # %bb.0: +; RV32-NEXT: amomaxu.w.aqrl zero, a1, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: amomaxu_w_discard: +; RV64: # %bb.0: +; RV64-NEXT: amomaxu.w.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw umax ptr %a, i32 %b seq_cst + ret void +} + +define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind { +; RV32-LABEL: amomaxu_d_discard: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: lw a4, 4(a0) +; RV32-NEXT: lw a5, 0(a0) +; RV32-NEXT: mv s1, a2 +; RV32-NEXT: mv s2, a1 +; RV32-NEXT: j .LBB13_2 +; RV32-NEXT: .LBB13_1: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 +; RV32-NEXT: sw a5, 8(sp) +; RV32-NEXT: sw a4, 12(sp) +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: li a4, 5 +; RV32-NEXT: li a5, 5 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __atomic_compare_exchange_8@plt +; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: bnez a0, .LBB13_6 +; RV32-NEXT: .LBB13_2: # %atomicrmw.start +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: beq a4, s1, .LBB13_4 +; RV32-NEXT: # %bb.3: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 +; RV32-NEXT: sltu a0, s1, a4 +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: mv a3, a4 +; RV32-NEXT: bnez a0, .LBB13_1 +; RV32-NEXT: j .LBB13_5 +; RV32-NEXT: .LBB13_4: # in Loop: Header=BB13_2 Depth=1 +; RV32-NEXT: sltu a0, s2, a5 +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: mv a3, a4 +; RV32-NEXT: bnez a0, .LBB13_1 +; RV32-NEXT: .LBB13_5: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 +; RV32-NEXT: mv a2, s2 +; RV32-NEXT: mv a3, s1 +; RV32-NEXT: j .LBB13_1 +; RV32-NEXT: .LBB13_6: # %atomicrmw.end +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: amomaxu_d_discard: +; RV64: # %bb.0: +; RV64-NEXT: amomaxu.d.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw umax ptr %a, i64 %b seq_cst + ret void +} + +define void @amomin_w_discard(ptr %a, i32 %b) nounwind { +; RV32-LABEL: amomin_w_discard: +; RV32: # %bb.0: +; RV32-NEXT: amomin.w.aqrl zero, a1, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: amomin_w_discard: +; RV64: # %bb.0: +; RV64-NEXT: amomin.w.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw min ptr %a, i32 %b seq_cst + ret void +} + +define void @amomin_d_discard(ptr %a, i64 %b) nounwind { +; RV32-LABEL: amomin_d_discard: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: lw a4, 4(a0) +; RV32-NEXT: lw a5, 0(a0) +; RV32-NEXT: mv s1, a2 +; RV32-NEXT: mv s2, a1 +; RV32-NEXT: j .LBB15_2 +; RV32-NEXT: .LBB15_1: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 +; RV32-NEXT: sw a5, 8(sp) +; RV32-NEXT: sw a4, 12(sp) +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: li a4, 5 +; RV32-NEXT: li a5, 5 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __atomic_compare_exchange_8@plt +; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: bnez a0, .LBB15_6 +; RV32-NEXT: .LBB15_2: # %atomicrmw.start +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: beq a4, s1, .LBB15_4 +; RV32-NEXT: # %bb.3: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 +; RV32-NEXT: slt a0, s1, a4 +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: mv a3, a4 +; RV32-NEXT: beqz a0, .LBB15_1 +; RV32-NEXT: j .LBB15_5 +; RV32-NEXT: .LBB15_4: # in Loop: Header=BB15_2 Depth=1 +; RV32-NEXT: sltu a0, s2, a5 +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: mv a3, a4 +; RV32-NEXT: beqz a0, .LBB15_1 +; RV32-NEXT: .LBB15_5: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 +; RV32-NEXT: mv a2, s2 +; RV32-NEXT: mv a3, s1 +; RV32-NEXT: j .LBB15_1 +; RV32-NEXT: .LBB15_6: # %atomicrmw.end +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: amomin_d_discard: +; RV64: # %bb.0: +; RV64-NEXT: amomin.d.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw min ptr %a, i64 %b seq_cst + ret void +} + +define void @amominu_w_discard(ptr %a, i32 %b) nounwind { +; RV32-LABEL: amominu_w_discard: +; RV32: # %bb.0: +; RV32-NEXT: amominu.w.aqrl zero, a1, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: amominu_w_discard: +; RV64: # %bb.0: +; RV64-NEXT: amominu.w.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw umin ptr %a, i32 %b seq_cst + ret void +} + +define void @amominu_d_discard(ptr %a, i64 %b) nounwind { +; RV32-LABEL: amominu_d_discard: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: lw a4, 4(a0) +; RV32-NEXT: lw a5, 0(a0) +; RV32-NEXT: mv s1, a2 +; RV32-NEXT: mv s2, a1 +; RV32-NEXT: j .LBB17_2 +; RV32-NEXT: .LBB17_1: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 +; RV32-NEXT: sw a5, 8(sp) +; RV32-NEXT: sw a4, 12(sp) +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: li a4, 5 +; RV32-NEXT: li a5, 5 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __atomic_compare_exchange_8@plt +; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a5, 8(sp) +; RV32-NEXT: bnez a0, .LBB17_6 +; RV32-NEXT: .LBB17_2: # %atomicrmw.start +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: beq a4, s1, .LBB17_4 +; RV32-NEXT: # %bb.3: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 +; RV32-NEXT: sltu a0, s1, a4 +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: mv a3, a4 +; RV32-NEXT: beqz a0, .LBB17_1 +; RV32-NEXT: j .LBB17_5 +; RV32-NEXT: .LBB17_4: # in Loop: Header=BB17_2 Depth=1 +; RV32-NEXT: sltu a0, s2, a5 +; RV32-NEXT: mv a2, a5 +; RV32-NEXT: mv a3, a4 +; RV32-NEXT: beqz a0, .LBB17_1 +; RV32-NEXT: .LBB17_5: # %atomicrmw.start +; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 +; RV32-NEXT: mv a2, s2 +; RV32-NEXT: mv a3, s1 +; RV32-NEXT: j .LBB17_1 +; RV32-NEXT: .LBB17_6: # %atomicrmw.end +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: amominu_d_discard: +; RV64: # %bb.0: +; RV64-NEXT: amominu.d.aqrl zero, a1, (a0) +; RV64-NEXT: ret + %1 = atomicrmw umin ptr %a, i64 %b seq_cst + ret void +}