diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -36,6 +36,7 @@ RISCVMergeBaseOffset.cpp RISCVOptWInstrs.cpp RISCVRedundantCopyElimination.cpp + RISCVMoveOptimizer.cpp RISCVRegisterInfo.cpp RISCVRVVInitUndef.cpp RISCVSubtarget.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -72,6 +72,9 @@ void initializeRISCVInitUndefPass(PassRegistry &); extern char &RISCVInitUndefID; +FunctionPass *createRISCVMoveOptimizationPass(); +void initializeRISCVMoveOptPass(PassRegistry &); + InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, RISCVRegisterBankInfo &); diff --git a/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp @@ -0,0 +1,246 @@ +//===---------- RISCVMoveOptimizer.cpp - RISCV move opt. pass -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that performs move related peephole +// optimizations. This pass should be run after register allocation. +// +//===----------------------------------------------------------------------===// + +#include "RISCVInstrInfo.h" +#include "RISCVMachineFunctionInfo.h" + +using namespace llvm; + +#define RISCV_MOVE_OPT_NAME "RISC-V Zcmp move merging pass" + +namespace { +struct RISCVMoveOpt : public MachineFunctionPass { + static char ID; + + RISCVMoveOpt() : MachineFunctionPass(ID) { + initializeRISCVMoveOptPass(*PassRegistry::getPassRegistry()); + } + + const RISCVInstrInfo *TII; + const TargetRegisterInfo *TRI; + const RISCVSubtarget *Subtarget; + + // Track which register units have been modified and used. + LiveRegUnits ModifiedRegUnits, UsedRegUnits; + + bool isCandidateToMergeMVA01S(const DestSourcePair &RegPair); + bool isCandidateToMergeMVSA01(const DestSourcePair &RegPair); + // Merge the two instructions indicated into a single pair instruction. + MachineBasicBlock::iterator + mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, unsigned Opcode); + + // Look for C.MV instruction that can be combined with + // the given instruction into CM.MVA01S or CM.MVSA01. Return the matching + // instruction if one exists. + MachineBasicBlock::iterator + findMatchingInst(MachineBasicBlock::iterator &MBBI, unsigned InstOpcode, const DestSourcePair &RegPair); + bool MoveOpt(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return RISCV_MOVE_OPT_NAME; } +}; + +char RISCVMoveOpt::ID = 0; + +} // end of anonymous namespace + +INITIALIZE_PASS(RISCVMoveOpt, "riscv-move-opt", RISCV_MOVE_OPT_NAME, false, + false) + +// Check if registers meet CM.MVA01S constraints. +bool RISCVMoveOpt::isCandidateToMergeMVA01S(const DestSourcePair &RegPair) { + Register Destination = RegPair.Destination->getReg(); + Register Source = RegPair.Source->getReg(); + // If destination is not a0 or a1. + if ((Destination == RISCV::X10 || Destination == RISCV::X11) && + RISCV::SR07RegClass.contains(Source)) + return true; + return false; +} + +// Check if registers meet CM.MVSA01 constraints. +bool RISCVMoveOpt::isCandidateToMergeMVSA01(const DestSourcePair &RegPair) { + Register Destination = RegPair.Destination->getReg(); + Register Source = RegPair.Source->getReg(); + // If Source is s0 - s7. + if ((Source == RISCV::X10 || Source == RISCV::X11) && + RISCV::SR07RegClass.contains(Destination)) + return true; + return false; +} + +MachineBasicBlock::iterator +RISCVMoveOpt::mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + unsigned Opcode) { + const MachineOperand *Sreg1, *Sreg2; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); + DestSourcePair FirstPair = TII->isCopyInstrImpl(*I).value(); + DestSourcePair PairedRegs = TII->isCopyInstrImpl(*Paired).value(); + Register ARegInFirstPair = Opcode == RISCV::CM_MVA01S + ? FirstPair.Destination->getReg() + : FirstPair.Source->getReg(); + + if (NextI == Paired) + NextI = next_nodbg(NextI, E); + DebugLoc DL = I->getDebugLoc(); + + // The order of S-reg depends on which instruction holds A0, instead of + // the order of register pair. + // e,g. + // mv a1, s1 + // mv a0, s2 => cm.mva01s s2,s1 + // + // mv a0, s2 + // mv a1, s1 => cm.mva01s s2,s1 + if (Opcode == RISCV::CM_MVA01S) { + Sreg1 = + ARegInFirstPair == RISCV::X10 ? FirstPair.Source : PairedRegs.Source; + Sreg2 = + ARegInFirstPair == RISCV::X10 ? PairedRegs.Source : FirstPair.Source; + } else { + Sreg1 = ARegInFirstPair == RISCV::X10 ? FirstPair.Destination + : PairedRegs.Destination; + Sreg2 = ARegInFirstPair == RISCV::X10 ? PairedRegs.Destination + : FirstPair.Destination; + } + + BuildMI(*I->getParent(), I, DL, TII->get(Opcode)).add(*Sreg1).add(*Sreg2); + + I->eraseFromParent(); + Paired->eraseFromParent(); + return NextI; +} + +MachineBasicBlock::iterator +RISCVMoveOpt::findMatchingInst(MachineBasicBlock::iterator &MBBI, + unsigned InstOpcode, const DestSourcePair &RegPair) { + MachineBasicBlock::iterator E = MBBI->getParent()->end(); + // DestSourcePair RegPair = TII->isCopyInstrImpl(*MBBI).value(); + + // Track which register units have been modified and used between the first + // insn and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + + for (MachineBasicBlock::iterator I = next_nodbg(MBBI, E); I != E; + I = next_nodbg(I, E)) { + + MachineInstr &MI = *I; + + if (auto SecondPair = TII->isCopyInstrImpl(MI)) { + Register SourceReg = SecondPair->Source->getReg(); + Register DestReg = SecondPair->Destination->getReg(); + + if (InstOpcode == RISCV::CM_MVA01S && + isCandidateToMergeMVA01S(*SecondPair)) { + // If register pair is valid and destination registers are different. + if ((RegPair.Destination->getReg() == DestReg)) + return E; + + // If paired destination register was modified or used, there is no + // possibility of finding matching instruction so exit early. + if (!ModifiedRegUnits.available(DestReg)) + return E; + + // We need to check if the source register in the second paired + // instruction is defined in between. + if (ModifiedRegUnits.available(SourceReg)) + return I; + + } else if (InstOpcode == RISCV::CM_MVSA01 && + isCandidateToMergeMVSA01(*SecondPair)) { + if ((RegPair.Source->getReg() == SourceReg) || + (RegPair.Destination->getReg() == DestReg)) + return E; + + if (!ModifiedRegUnits.available(SourceReg)) + return E; + + // As for mvsa01, we need to make sure the dest register of the second + // paired instruction is not used in between, since we would move its + // definition ahead. + if (UsedRegUnits.available(DestReg)) + return I; + } + } + // Update modified / used register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + } + return E; +} + +// Finds instructions, which could be represented as C.MV instructions and +// merged into CM.MVA01S or CM.MVSA01. +bool RISCVMoveOpt::MoveOpt(MachineBasicBlock &MBB) { + bool Modified = false; + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + // Check if the instruction can be compressed to C.MV instruction. If it + // can, return Dest/Src register pair. + auto RegPair = TII->isCopyInstrImpl(*MBBI); + if (RegPair.has_value()) { + unsigned Opcode = 0; + + if (isCandidateToMergeMVA01S(*RegPair)) + Opcode = RISCV::CM_MVA01S; + else if (isCandidateToMergeMVSA01(*RegPair)) + Opcode = RISCV::CM_MVSA01; + else { + ++MBBI; + continue; + } + + MachineBasicBlock::iterator Paired = findMatchingInst(MBBI, Opcode, RegPair.value()); + // If matching instruction could be found merge them. + if (Paired != E) { + MBBI = mergePairedInsns(MBBI, Paired, Opcode); + Modified = true; + continue; + } + } + ++MBBI; + } + return Modified; +} + +bool RISCVMoveOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + + Subtarget = &Fn.getSubtarget(); + if (!Subtarget->hasStdExtZcmp()) + return false; + + TII = static_cast(Subtarget->getInstrInfo()); + TRI = Subtarget->getRegisterInfo(); + // Resize the modified and used register unit trackers. We do this once + // per function and then clear the register units each time we optimize a + // move. + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + bool Modified = false; + for (auto &MBB : Fn) { + Modified |= MoveOpt(MBB); + } + return Modified; +} + +/// createRISCVMoveOptimizationPass - returns an instance of the +/// move optimization pass. +FunctionPass *llvm::createRISCVMoveOptimizationPass() { + return new RISCVMoveOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -86,6 +86,7 @@ initializeRISCVInsertVSETVLIPass(*PR); initializeRISCVDAGToDAGISelPass(*PR); initializeRISCVInitUndefPass(*PR); + initializeRISCVMoveOptPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { @@ -347,6 +348,8 @@ } void RISCVPassConfig::addPreEmitPass2() { + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createRISCVMoveOptimizationPass()); addPass(createRISCVExpandPseudoPass()); // Schedule the expansion of AMOs at the last possible moment, avoiding the diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -176,6 +176,7 @@ ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Stack Frame Layout Analysis +; CHECK-NEXT: RISC-V Zcmp move merging pass ; CHECK-NEXT: RISC-V pseudo instruction expansion pass ; CHECK-NEXT: RISC-V atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll b/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll @@ -0,0 +1,197 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK32I %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK32ZCMP %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK64I %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK64ZCMP %s + +declare i32 @foo(i32) +declare i32 @func(i32,i32) + +define i32 @zcmp_mv(i32 %num, i32 %f) nounwind { +; CHECK32I-LABEL: zcmp_mv: +; CHECK32I: # %bb.0: +; CHECK32I-NEXT: addi sp, sp, -16 +; CHECK32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; CHECK32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; CHECK32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; CHECK32I-NEXT: mv s0, a1 +; CHECK32I-NEXT: mv s1, a0 +; CHECK32I-NEXT: call func@plt +; CHECK32I-NEXT: mv s2, a0 +; CHECK32I-NEXT: mv a0, s1 +; CHECK32I-NEXT: mv a1, s0 +; CHECK32I-NEXT: call func@plt +; CHECK32I-NEXT: add a0, s2, s0 +; CHECK32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; CHECK32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; CHECK32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; CHECK32I-NEXT: addi sp, sp, 16 +; CHECK32I-NEXT: ret +; +; CHECK32ZCMP-LABEL: zcmp_mv: +; CHECK32ZCMP: # %bb.0: +; CHECK32ZCMP-NEXT: addi sp, sp, -16 +; CHECK32ZCMP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32ZCMP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; CHECK32ZCMP-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; CHECK32ZCMP-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; CHECK32ZCMP-NEXT: cm.mvsa01 s1, s0 +; CHECK32ZCMP-NEXT: call func@plt +; CHECK32ZCMP-NEXT: mv s2, a0 +; CHECK32ZCMP-NEXT: cm.mva01s s1, s0 +; CHECK32ZCMP-NEXT: call func@plt +; CHECK32ZCMP-NEXT: add a0, s2, s0 +; CHECK32ZCMP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32ZCMP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; CHECK32ZCMP-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; CHECK32ZCMP-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; CHECK32ZCMP-NEXT: addi sp, sp, 16 +; CHECK32ZCMP-NEXT: ret +; +; CHECK64I-LABEL: zcmp_mv: +; CHECK64I: # %bb.0: +; CHECK64I-NEXT: addi sp, sp, -32 +; CHECK64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK64I-NEXT: mv s0, a1 +; CHECK64I-NEXT: mv s1, a0 +; CHECK64I-NEXT: call func@plt +; CHECK64I-NEXT: mv s2, a0 +; CHECK64I-NEXT: mv a0, s1 +; CHECK64I-NEXT: mv a1, s0 +; CHECK64I-NEXT: call func@plt +; CHECK64I-NEXT: addw a0, s2, s0 +; CHECK64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK64I-NEXT: addi sp, sp, 32 +; CHECK64I-NEXT: ret +; +; CHECK64ZCMP-LABEL: zcmp_mv: +; CHECK64ZCMP: # %bb.0: +; CHECK64ZCMP-NEXT: addi sp, sp, -32 +; CHECK64ZCMP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK64ZCMP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK64ZCMP-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK64ZCMP-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK64ZCMP-NEXT: cm.mvsa01 s1, s0 +; CHECK64ZCMP-NEXT: call func@plt +; CHECK64ZCMP-NEXT: mv s2, a0 +; CHECK64ZCMP-NEXT: cm.mva01s s1, s0 +; CHECK64ZCMP-NEXT: call func@plt +; CHECK64ZCMP-NEXT: addw a0, s2, s0 +; CHECK64ZCMP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK64ZCMP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK64ZCMP-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK64ZCMP-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK64ZCMP-NEXT: addi sp, sp, 32 +; CHECK64ZCMP-NEXT: ret + %call = call i32 @func(i32 %num, i32 %f) + %call1 = call i32 @func(i32 %num, i32 %f) + %res = add i32 %call, %f + ret i32 %res +} + +define i32 @not_zcmp_mv(i32 %num, i32 %f) nounwind { +; CHECK32I-LABEL: not_zcmp_mv: +; CHECK32I: # %bb.0: +; CHECK32I-NEXT: addi sp, sp, -16 +; CHECK32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; CHECK32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; CHECK32I-NEXT: mv s0, a1 +; CHECK32I-NEXT: call foo@plt +; CHECK32I-NEXT: mv s1, a0 +; CHECK32I-NEXT: mv a0, s0 +; CHECK32I-NEXT: call foo@plt +; CHECK32I-NEXT: mv a0, s1 +; CHECK32I-NEXT: call foo@plt +; CHECK32I-NEXT: li a0, 1 +; CHECK32I-NEXT: mv a1, s0 +; CHECK32I-NEXT: call func@plt +; CHECK32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; CHECK32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; CHECK32I-NEXT: addi sp, sp, 16 +; CHECK32I-NEXT: ret +; +; CHECK32ZCMP-LABEL: not_zcmp_mv: +; CHECK32ZCMP: # %bb.0: +; CHECK32ZCMP-NEXT: addi sp, sp, -16 +; CHECK32ZCMP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK32ZCMP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; CHECK32ZCMP-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; CHECK32ZCMP-NEXT: mv s0, a1 +; CHECK32ZCMP-NEXT: call foo@plt +; CHECK32ZCMP-NEXT: mv s1, a0 +; CHECK32ZCMP-NEXT: mv a0, s0 +; CHECK32ZCMP-NEXT: call foo@plt +; CHECK32ZCMP-NEXT: mv a0, s1 +; CHECK32ZCMP-NEXT: call foo@plt +; CHECK32ZCMP-NEXT: li a0, 1 +; CHECK32ZCMP-NEXT: mv a1, s0 +; CHECK32ZCMP-NEXT: call func@plt +; CHECK32ZCMP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK32ZCMP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; CHECK32ZCMP-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; CHECK32ZCMP-NEXT: addi sp, sp, 16 +; CHECK32ZCMP-NEXT: ret +; +; CHECK64I-LABEL: not_zcmp_mv: +; CHECK64I: # %bb.0: +; CHECK64I-NEXT: addi sp, sp, -32 +; CHECK64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK64I-NEXT: mv s0, a1 +; CHECK64I-NEXT: call foo@plt +; CHECK64I-NEXT: mv s1, a0 +; CHECK64I-NEXT: mv a0, s0 +; CHECK64I-NEXT: call foo@plt +; CHECK64I-NEXT: mv a0, s1 +; CHECK64I-NEXT: call foo@plt +; CHECK64I-NEXT: li a0, 1 +; CHECK64I-NEXT: mv a1, s0 +; CHECK64I-NEXT: call func@plt +; CHECK64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK64I-NEXT: addi sp, sp, 32 +; CHECK64I-NEXT: ret +; +; CHECK64ZCMP-LABEL: not_zcmp_mv: +; CHECK64ZCMP: # %bb.0: +; CHECK64ZCMP-NEXT: addi sp, sp, -32 +; CHECK64ZCMP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK64ZCMP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK64ZCMP-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK64ZCMP-NEXT: mv s0, a1 +; CHECK64ZCMP-NEXT: call foo@plt +; CHECK64ZCMP-NEXT: mv s1, a0 +; CHECK64ZCMP-NEXT: mv a0, s0 +; CHECK64ZCMP-NEXT: call foo@plt +; CHECK64ZCMP-NEXT: mv a0, s1 +; CHECK64ZCMP-NEXT: call foo@plt +; CHECK64ZCMP-NEXT: li a0, 1 +; CHECK64ZCMP-NEXT: mv a1, s0 +; CHECK64ZCMP-NEXT: call func@plt +; CHECK64ZCMP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK64ZCMP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK64ZCMP-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK64ZCMP-NEXT: addi sp, sp, 32 +; CHECK64ZCMP-NEXT: ret + %call = call i32 @foo(i32 %num) + %call1 = call i32 @foo(i32 %f) + %tmp = call i32 @foo(i32 %call) + %res = call i32 @func(i32 1, i32 %f) + ret i32 %res +}