diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -36,6 +36,7 @@ RISCVMergeBaseOffset.cpp RISCVOptWInstrs.cpp RISCVRedundantCopyElimination.cpp + RISCVPushPopOptimizer.cpp RISCVRegisterInfo.cpp RISCVRVVInitUndef.cpp RISCVSubtarget.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -72,6 +72,9 @@ void initializeRISCVInitUndefPass(PassRegistry &); extern char &RISCVInitUndefID; +FunctionPass *createRISCVPushPopOptimizationPass(); +void initializeRISCVPushPopOptPass(PassRegistry &); + InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, RISCVRegisterBankInfo &); diff --git a/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp @@ -0,0 +1,145 @@ +//===------- RISCVPushPopOptimizer.cpp - RISCV Push/Pop opt. pass ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that modifies PUSH/POP instructions from Zca +// standard to use their non prolog/epilog related functionalities +// and generates POPRET instruction. +// +//===----------------------------------------------------------------------===// + +#include "RISCVInstrInfo.h" +#include "RISCVMachineFunctionInfo.h" + +using namespace llvm; + +#define RISCV_PUSH_POP_OPT_NAME "RISC-V Zcmp Push/Pop optimization pass" + +namespace { +struct RISCVPushPopOpt : public MachineFunctionPass { + static char ID; + + RISCVPushPopOpt() : MachineFunctionPass(ID) { + initializeRISCVPushPopOptPass(*PassRegistry::getPassRegistry()); + } + + const RISCVInstrInfo *TII; + const TargetRegisterInfo *TRI; + + // Track which register units have been modified and used. + LiveRegUnits ModifiedRegUnits, UsedRegUnits; + + bool usePopRet(MachineBasicBlock::iterator &MBBI, + MachineBasicBlock::iterator &NextI, bool IsReturnZero); + bool adjustRetVal(MachineBasicBlock::iterator &MBBI); + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return RISCV_PUSH_POP_OPT_NAME; } +}; + +char RISCVPushPopOpt::ID = 0; + +} // end of anonymous namespace + +INITIALIZE_PASS(RISCVPushPopOpt, "riscv-push-pop-opt", RISCV_PUSH_POP_OPT_NAME, + false, false) + +// Check if POP instruction was inserted into the MBB and return iterator to it. +static MachineBasicBlock::iterator containsPop(MachineBasicBlock &MBB) { + for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBB.end(); + MBBI = next_nodbg(MBBI, MBB.end())) + if (MBBI->getOpcode() == RISCV::CM_POP) + return MBBI; + + return MBB.end(); +} + +bool RISCVPushPopOpt::usePopRet(MachineBasicBlock::iterator &MBBI, + MachineBasicBlock::iterator &NextI, + bool IsReturnZero) { + // Since Pseudo instruction lowering happen later in the pipeline, + // this will detect all ret instruction. + DebugLoc DL = NextI->getDebugLoc(); + unsigned Opc = IsReturnZero ? RISCV::CM_POPRETZ : RISCV::CM_POPRET; + BuildMI(*NextI->getParent(), NextI, DL, TII->get(Opc)) + .add(MBBI->getOperand(0)) + .add(MBBI->getOperand(1)); + + MBBI->eraseFromParent(); + NextI->eraseFromParent(); + return true; +} + +// Search for last assignment to a0 and if possible use ret_val slot of POP to +// store return value. +bool RISCVPushPopOpt::adjustRetVal(MachineBasicBlock::iterator &MBBI) { + MachineBasicBlock::reverse_iterator RE = MBBI->getParent()->rend(); + // Track which register units have been modified and used between the POP + // insn and the last assignment to register a0. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + // Since POP instruction is in Epilogue no normal instructions will follow + // after it. Therefore search only previous ones to find the return value. + for (MachineBasicBlock::reverse_iterator I = + next_nodbg(MBBI.getReverse(), RE); + I != RE; I = next_nodbg(I, RE)) { + MachineInstr &MI = *I; + if (auto OperandPair = TII->isCopyInstrImpl(MI)) { + Register DestReg = OperandPair->Destination->getReg(); + Register Source = OperandPair->Source->getReg(); + if (DestReg == RISCV::X10 && Source == RISCV::X0) { + MI.removeFromParent(); + return true; + } + } + // Update modified / used register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + // If a0 was modified or used, there is no possibility + // of using ret_val slot of POP instruction. + if (!ModifiedRegUnits.available(RISCV::X10) || + !UsedRegUnits.available(RISCV::X10)) + return false; + } + return false; +} + +bool RISCVPushPopOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + + // If Zcmp extension is not supported, abort. + const RISCVSubtarget *Subtarget = &Fn.getSubtarget(); + if (!Subtarget->hasStdExtZcmp()) + return false; + + // If frame pointer elimination has been disabled, abort to avoid breaking the + // ABI. + if (Fn.getTarget().Options.DisableFramePointerElim(Fn)) + return false; + + TII = static_cast(Subtarget->getInstrInfo()); + TRI = Subtarget->getRegisterInfo(); + // Resize the modified and used register unit trackers. We do this once + // per function and then clear the register units each time we determine + // correct return value for the POP. + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + bool Modified = false; + for (auto &MBB : Fn) { + MachineBasicBlock::iterator MBBI = containsPop(MBB); + MachineBasicBlock::iterator NextI = next_nodbg(MBBI, MBB.end()); + if (MBBI != MBB.end() && NextI->getOpcode() == RISCV::PseudoRET) + Modified |= usePopRet(MBBI, NextI, adjustRetVal(MBBI)); + } + return Modified; +} + +/// createRISCVPushPopOptimizationPass - returns an instance of the +/// Push/Pop optimization pass. +FunctionPass *llvm::createRISCVPushPopOptimizationPass() { + return new RISCVPushPopOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -86,6 +86,7 @@ initializeRISCVInsertVSETVLIPass(*PR); initializeRISCVDAGToDAGISelPass(*PR); initializeRISCVInitUndefPass(*PR); + initializeRISCVPushPopOptPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { @@ -347,6 +348,10 @@ } void RISCVPassConfig::addPreEmitPass2() { + // Schedule PushPop Optimization before expansion of Pseudo instruction, + // ensuring return instruction is detected correctly. + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createRISCVPushPopOptimizationPass()); addPass(createRISCVExpandPseudoPass()); // Schedule the expansion of AMOs at the last possible moment, avoiding the diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -176,6 +176,7 @@ ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Stack Frame Layout Analysis +; CHECK-NEXT: RISC-V Zcmp Push/Pop optimization pass ; CHECK-NEXT: RISC-V pseudo instruction expansion pass ; CHECK-NEXT: RISC-V atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -338,8 +338,7 @@ ; RV32IZCMP-NEXT: sw a0, %lo(var+4)(a7) ; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: sw a0, %lo(var)(a7) -; RV32IZCMP-NEXT: cm.pop {ra, s0-s11}, 80 -; RV32IZCMP-NEXT: ret +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 80 ; ; RV32IZCMP-WITH-FP-LABEL: callee: ; RV32IZCMP-WITH-FP: # %bb.0: @@ -758,8 +757,7 @@ ; RV64IZCMP-NEXT: sw a0, %lo(var+4)(a7) ; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: sw a0, %lo(var)(a7) -; RV64IZCMP-NEXT: cm.pop {ra, s0-s11}, 160 -; RV64IZCMP-NEXT: ret +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 ; ; RV64IZCMP-WITH-FP-LABEL: callee: ; RV64IZCMP-WITH-FP: # %bb.0: @@ -1287,8 +1285,7 @@ ; RV32IZCMP-NEXT: lw a0, 88(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: sw a0, %lo(var)(s0) ; RV32IZCMP-NEXT: addi sp, sp, 32 -; RV32IZCMP-NEXT: cm.pop {ra, s0-s11}, 112 -; RV32IZCMP-NEXT: ret +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 112 ; ; RV32IZCMP-WITH-FP-LABEL: caller: ; RV32IZCMP-WITH-FP: # %bb.0: @@ -1841,8 +1838,7 @@ ; RV64IZCMP-NEXT: ld a0, 176(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: sw a0, %lo(var)(s0) ; RV64IZCMP-NEXT: addi sp, sp, 128 -; RV64IZCMP-NEXT: cm.pop {ra, s0-s11}, 160 -; RV64IZCMP-NEXT: ret +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 ; ; RV64IZCMP-WITH-FP-LABEL: caller: ; RV64IZCMP-WITH-FP: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll --- a/llvm/test/CodeGen/RISCV/push-pop-popret.ll +++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll @@ -19,10 +19,8 @@ ; RV32IZCMP-NEXT: .cfi_offset ra, -4 ; RV32IZCMP-NEXT: addi a0, sp, 12 ; RV32IZCMP-NEXT: call test@plt -; RV32IZCMP-NEXT: li a0, 0 ; RV32IZCMP-NEXT: addi sp, sp, 464 -; RV32IZCMP-NEXT: cm.pop {ra}, 64 -; RV32IZCMP-NEXT: ret +; RV32IZCMP-NEXT: cm.popretz {ra}, 64 ; ; RV64IZCMP-LABEL: foo: ; RV64IZCMP: # %bb.0: @@ -32,10 +30,8 @@ ; RV64IZCMP-NEXT: .cfi_offset ra, -8 ; RV64IZCMP-NEXT: addi a0, sp, 8 ; RV64IZCMP-NEXT: call test@plt -; RV64IZCMP-NEXT: li a0, 0 ; RV64IZCMP-NEXT: addi sp, sp, 464 -; RV64IZCMP-NEXT: cm.pop {ra}, 64 -; RV64IZCMP-NEXT: ret +; RV64IZCMP-NEXT: cm.popretz {ra}, 64 ; ; RV32I-LABEL: foo: ; RV32I: # %bb.0: @@ -82,10 +78,8 @@ ; RV32IZCMP-NEXT: sub a0, sp, a0 ; RV32IZCMP-NEXT: mv sp, a0 ; RV32IZCMP-NEXT: call callee_void@plt -; RV32IZCMP-NEXT: li a0, 0 ; RV32IZCMP-NEXT: addi sp, s0, -16 -; RV32IZCMP-NEXT: cm.pop {ra, s0}, 16 -; RV32IZCMP-NEXT: ret +; RV32IZCMP-NEXT: cm.popretz {ra, s0}, 16 ; ; RV64IZCMP-LABEL: pushpopret0: ; RV64IZCMP: # %bb.0: # %entry @@ -102,10 +96,8 @@ ; RV64IZCMP-NEXT: sub a0, sp, a0 ; RV64IZCMP-NEXT: mv sp, a0 ; RV64IZCMP-NEXT: call callee_void@plt -; RV64IZCMP-NEXT: li a0, 0 ; RV64IZCMP-NEXT: addi sp, s0, -16 -; RV64IZCMP-NEXT: cm.pop {ra, s0}, 16 -; RV64IZCMP-NEXT: ret +; RV64IZCMP-NEXT: cm.popretz {ra, s0}, 16 ; ; RV32I-LABEL: pushpopret0: ; RV32I: # %bb.0: # %entry @@ -174,8 +166,7 @@ ; RV32IZCMP-NEXT: call callee_void@plt ; RV32IZCMP-NEXT: li a0, 1 ; RV32IZCMP-NEXT: addi sp, s0, -16 -; RV32IZCMP-NEXT: cm.pop {ra, s0}, 16 -; RV32IZCMP-NEXT: ret +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 ; ; RV64IZCMP-LABEL: pushpopret1: ; RV64IZCMP: # %bb.0: # %entry @@ -194,8 +185,7 @@ ; RV64IZCMP-NEXT: call callee_void@plt ; RV64IZCMP-NEXT: li a0, 1 ; RV64IZCMP-NEXT: addi sp, s0, -16 -; RV64IZCMP-NEXT: cm.pop {ra, s0}, 16 -; RV64IZCMP-NEXT: ret +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 ; ; RV32I-LABEL: pushpopret1: ; RV32I: # %bb.0: # %entry @@ -264,8 +254,7 @@ ; RV32IZCMP-NEXT: call callee_void@plt ; RV32IZCMP-NEXT: li a0, -1 ; RV32IZCMP-NEXT: addi sp, s0, -16 -; RV32IZCMP-NEXT: cm.pop {ra, s0}, 16 -; RV32IZCMP-NEXT: ret +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 ; ; RV64IZCMP-LABEL: pushpopretneg1: ; RV64IZCMP: # %bb.0: # %entry @@ -284,8 +273,7 @@ ; RV64IZCMP-NEXT: call callee_void@plt ; RV64IZCMP-NEXT: li a0, -1 ; RV64IZCMP-NEXT: addi sp, s0, -16 -; RV64IZCMP-NEXT: cm.pop {ra, s0}, 16 -; RV64IZCMP-NEXT: ret +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 ; ; RV32I-LABEL: pushpopretneg1: ; RV32I: # %bb.0: # %entry @@ -354,8 +342,7 @@ ; RV32IZCMP-NEXT: call callee_void@plt ; RV32IZCMP-NEXT: li a0, 2 ; RV32IZCMP-NEXT: addi sp, s0, -16 -; RV32IZCMP-NEXT: cm.pop {ra, s0}, 16 -; RV32IZCMP-NEXT: ret +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 ; ; RV64IZCMP-LABEL: pushpopret2: ; RV64IZCMP: # %bb.0: # %entry @@ -374,8 +361,7 @@ ; RV64IZCMP-NEXT: call callee_void@plt ; RV64IZCMP-NEXT: li a0, 2 ; RV64IZCMP-NEXT: addi sp, s0, -16 -; RV64IZCMP-NEXT: cm.pop {ra, s0}, 16 -; RV64IZCMP-NEXT: ret +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 ; ; RV32I-LABEL: pushpopret2: ; RV32I: # %bb.0: # %entry @@ -889,15 +875,13 @@ ; RV32IZCMP: # %bb.0: ; RV32IZCMP-NEXT: cm.push {ra}, -16 ; RV32IZCMP-NEXT: call foo_test_irq@plt -; RV32IZCMP-NEXT: cm.pop {ra}, 16 -; RV32IZCMP-NEXT: ret +; RV32IZCMP-NEXT: cm.popret {ra}, 16 ; ; RV64IZCMP-LABEL: foo_no_irq: ; RV64IZCMP: # %bb.0: ; RV64IZCMP-NEXT: cm.push {ra}, -16 ; RV64IZCMP-NEXT: call foo_test_irq@plt -; RV64IZCMP-NEXT: cm.pop {ra}, 16 -; RV64IZCMP-NEXT: ret +; RV64IZCMP-NEXT: cm.popret {ra}, 16 ; ; RV32I-LABEL: foo_no_irq: ; RV32I: # %bb.0: @@ -1519,8 +1503,7 @@ ; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) ; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) -; RV32IZCMP-NEXT: cm.pop {ra, s0-s11}, 80 -; RV32IZCMP-NEXT: ret +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 80 ; ; RV64IZCMP-LABEL: callee_no_irq: ; RV64IZCMP: # %bb.0: @@ -1603,8 +1586,7 @@ ; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) ; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) -; RV64IZCMP-NEXT: cm.pop {ra, s0-s11}, 160 -; RV64IZCMP-NEXT: ret +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 ; ; RV32I-LABEL: callee_no_irq: ; RV32I: # %bb.0: