diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -302,6 +302,7 @@ return Features; // Keep "c" feature if there is one in PlatformFlags. } + bool hasZca = false; Optional Attr = Attributes.getAttributeString(RISCVAttrs::ARCH); if (Attr) { // The Arch pattern is [rv32|rv64][i|e]version(_[m|a|f|d|c]version)* @@ -313,6 +314,7 @@ else if (Arch.consume_front("rv64")) Features.AddFeature("64bit"); + hasZca = Arch.contains("zca"); while (!Arch.empty()) { switch (Arch[0]) { default: @@ -338,6 +340,12 @@ } } + if(hasZca){ + Features.AddFeature("c",false); + Features.AddFeature("experimental-zca"); + } + + return Features; } diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -109,6 +109,9 @@ {"zbr", RISCVExtensionVersion{0, 93}}, {"zbt", RISCVExtensionVersion{0, 93}}, {"zvfh", RISCVExtensionVersion{0, 1}}, + {"zca", RISCVExtensionVersion{0, 70}}, + {"zcb", RISCVExtensionVersion{0, 70}}, + {"zcmp", RISCVExtensionVersion{0, 70}}, }; static bool stripExperimentalPrefix(StringRef &Ext) { diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -172,6 +172,9 @@ OperandMatchResultTy parseMaskReg(OperandVector &Operands); OperandMatchResultTy parseInsnDirectiveOpcode(OperandVector &Operands); OperandMatchResultTy parseGPRAsFPR(OperandVector &Operands); + OperandMatchResultTy parseReglist(OperandVector &Operands); + OperandMatchResultTy parseRetval(OperandVector &Operands); + OperandMatchResultTy parseZceSpimm(OperandVector &Operands); bool parseOperand(OperandVector &Operands, StringRef Mnemonic); @@ -276,6 +279,8 @@ Immediate, SystemRegister, VType, + Rlist, + Spimm, } Kind; bool IsRV64; @@ -302,6 +307,15 @@ unsigned Val; }; + struct RlistOp { + bool isCInst; + unsigned Val; + }; + + struct SpimmOp { + unsigned Val; + }; + SMLoc StartLoc, EndLoc; union { StringRef Tok; @@ -309,6 +323,8 @@ ImmOp Imm; struct SysRegOp SysReg; struct VTypeOp VType; + struct RlistOp Rlist; + struct SpimmOp Spimm; }; RISCVOperand(KindTy K) : Kind(K) {} @@ -335,6 +351,12 @@ case KindTy::VType: VType = o.VType; break; + case KindTy::Rlist: + Rlist = o.Rlist; + break; + case KindTy::Spimm: + Spimm = o.Spimm; + break; } } @@ -346,6 +368,8 @@ bool isImm() const override { return Kind == KindTy::Immediate; } bool isMem() const override { return false; } bool isSystemRegister() const { return Kind == KindTy::SystemRegister; } + bool isRlist() const { return Kind == KindTy::Rlist; } + bool isSpimm() const { return Kind == KindTy::Spimm; } bool isGPR() const { return Kind == KindTy::Register && @@ -618,6 +642,15 @@ VK == RISCVMCExpr::VK_RISCV_None; } + bool isUImm2Lsb0() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; + if (!isImm()) + return false; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + return IsConstantImm && isShiftedUInt<1,1>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; + } + bool isUImm7Lsb00() const { if (!isImm()) return false; @@ -830,6 +863,16 @@ RISCVVType::printVType(getVType(), OS); OS << '>'; break; + case KindTy::Rlist: + OS << "'; + break; + case KindTy::Spimm: + OS << "{Spimm: "; + RISCVZCE::printSpimm(Spimm.Val, OS); + OS << '}'; + break; } } @@ -887,6 +930,25 @@ return Op; } + static std::unique_ptr + createRlist(unsigned RlistEncode, SMLoc S, bool IsRV64, bool isCInst) { + auto Op = std::make_unique(KindTy::Rlist); + Op->Rlist.Val = RlistEncode; + Op->Rlist.isCInst = isCInst; + Op->StartLoc = S; + Op->IsRV64 = IsRV64; + return Op; + } + + static std::unique_ptr createSpimm(unsigned spimm, SMLoc S, + bool IsRV64) { + auto Op = std::make_unique(KindTy::Spimm); + Op->Spimm.Val = spimm; + Op->StartLoc = S; + Op->IsRV64 = IsRV64; + return Op; + } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { assert(Expr && "Expr shouldn't be null!"); int64_t Imm = 0; @@ -970,6 +1032,16 @@ Inst.addOperand(MCOperand::createImm(Imm)); } + void addRlistOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createImm(Rlist.Val)); + } + + void addSpimmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createImm(Spimm.Val)); + } + // Returns the rounding mode represented by this RISCVOperand. Should only // be called after checking isFRMArg. RISCVFPRndMode::RoundingMode getRoundingMode() const { @@ -1151,6 +1223,10 @@ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 4) - 1); case Match_InvalidUImm2: return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 2) - 1); + case Match_InvalidUImm2Lsb0: + return generateImmOutOfRangeError( + Operands, ErrorInfo, 0, 2, + "immediate must be one of"); case Match_InvalidUImm3: return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 3) - 1); case Match_InvalidUImm5: @@ -1279,6 +1355,18 @@ (1 << 4), "immediate must be in the range"); } + case Match_InvalidRlist: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, "operand must be {}, {a0}, or {a0-a[1|2|3]}"); + } + case Match_InvalidSpimm: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error( + ErrorLoc, + "This stack adjustment is invalid for this instruction and register " + "list, " + "Please refer to Zce spec for a detailed range of stack adjustment."); + } case Match_InvalidRnumArg: { return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10); } @@ -1922,6 +2010,115 @@ return MatchOperand_Success; } +OperandMatchResultTy RISCVAsmParser::parseReglist(OperandVector &Operands) { + // Rlist grammar: {ra [, s0[-sN]]} (UABI) + // XRlist :{x1 [, x8[-x9][, x18[-xN]]]} (UABI) + SMLoc S = getLoc(); + if (getLexer().isNot(AsmToken::LCurly)) + return MatchOperand_NoMatch; + getLexer().Lex(); // eat '{' + bool IsEmptyList = getLexer().is(AsmToken::RCurly); + + StringRef Memonic = + static_cast(Operands.front().get())->getToken(); + bool Is16Bit = Memonic.startswith("cm."); + bool IsEABI = isRV32E() || Memonic.endswith(".e"); + bool IsXReglist = Memonic.find("x"); + + MCRegister RegStart = RISCV::NoRegister; + MCRegister RegEnd = RISCV::NoRegister; + if (!IsEmptyList) { + StringRef RegName = getLexer().getTok().getIdentifier(); + matchRegisterNameHelper(IsEABI, RegStart, RegName); + if (RegStart != RISCV::X1) + return MatchOperand_NoMatch; + getLexer().Lex(); + } + + // parse case like ,s0 + if (getLexer().is(AsmToken::Comma)) { + getLexer().Lex(); + if (getLexer().isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + StringRef RegName = getLexer().getTok().getIdentifier(); + if (matchRegisterNameHelper(/*IsEABI*/ false, RegStart, RegName)) + return MatchOperand_NoMatch; + if (RegStart != RISCV::X8) + return MatchOperand_NoMatch; + getLexer().Lex(); // eat reg + } + + // parse case like -s1 + if (getLexer().is(AsmToken::Minus)) { + getLexer().Lex(); + StringRef EndName = getLexer().getTok().getIdentifier(); + // FixMe: the register mapping and checks of EABI is wrong + if (matchRegisterNameHelper(/*IsEABI*/ false, RegEnd, EndName)) + return MatchOperand_NoMatch; + getLexer().Lex(); + } + + // parse extra part like ', x18[-x20]' for XRegList + if (IsXReglist && getLexer().is(AsmToken::Comma)){ + if (RegEnd != RISCV::X9) + return MatchOperand_NoMatch; + + // parse ', x18' for extra part + getLexer().Lex(); + if (getLexer().isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + StringRef EndName = getLexer().getTok().getIdentifier(); + if (MatchRegisterName(EndName) != RISCV::X18) + return MatchOperand_NoMatch; + getLexer().Lex(); + + // parse '-x20' for extra part + if (getLexer().is(AsmToken::Minus)){ + getLexer().Lex(); + if (getLexer().isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + EndName = getLexer().getTok().getIdentifier(); + if (MatchRegisterName(EndName) == RISCV::NoRegister) + return MatchOperand_NoMatch; + getLexer().Lex(); + } + RegEnd = MatchRegisterName(EndName); + } + + if (getLexer().isNot(AsmToken::RCurly)) + return MatchOperand_NoMatch; + getLexer().Lex(); // eat '}' + + if (RegEnd == RISCV::NoRegister) + RegEnd = RegStart; + + auto Encode = RISCVZCE::encodeRlist(RegEnd, Is16Bit, IsEABI); + if (Encode == 16) + Error(getLoc(), "invalid register list, {ra, s0-s10} is not supported."); + Operands.push_back(RISCVOperand::createRlist(Encode, S, isRV64(), Is16Bit)); + + return MatchOperand_Success; +} + +OperandMatchResultTy RISCVAsmParser::parseZceSpimm(OperandVector &Operands) { + if (getLexer().is(AsmToken::Minus)) + getLexer().Lex(); + + SMLoc S = getLoc(); + + StringRef Memonic = + static_cast(Operands.front().get())->getToken(); + int64_t StackAdjustment = getLexer().getTok().getIntVal(); + unsigned Spimm = 0; + unsigned RlistVal = static_cast(Operands[1].get())->Rlist.Val; + + bool IsEABI = isRV32E() || Memonic.endswith(".e"); + if (!RISCVZCE::getSpimm(RlistVal, Spimm, StackAdjustment, isRV64(), IsEABI)) + return MatchOperand_NoMatch; + Operands.push_back(RISCVOperand::createSpimm(Spimm << 4, S, isRV64())); + getLexer().Lex(); + return MatchOperand_Success; +} /// Looks at a token type and creates the relevant operand from this /// information, adding to Operands. If operand was parsed, returns false, else @@ -2794,7 +2991,16 @@ .addImm(Imm - 1) .addOperand(Inst.getOperand(3))); } - + return false; + } + case RISCV::CM_POPRET: + case RISCV::CM_POPRETZ: + case RISCV::CM_POP: + case RISCV::CM_PUSH: { + unsigned Opc = Inst.getOpcode(); + emitToStreamer(Out, MCInstBuilder(Opc) + .addOperand(Inst.getOperand(0)) + .addOperand(Inst.getOperand(1))); return false; } } diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -37,6 +37,7 @@ RISCVMCInstLower.cpp RISCVMergeBaseOffset.cpp RISCVRedundantCopyElimination.cpp + RISCVMoveOptimizer.cpp RISCVRegisterBankInfo.cpp RISCVRegisterInfo.cpp RISCVSExtWRemoval.cpp @@ -44,6 +45,7 @@ RISCVTargetMachine.cpp RISCVTargetObjectFile.cpp RISCVTargetTransformInfo.cpp + RISCVOptimizePushPop.cpp LINK_COMPONENTS Analysis diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -170,6 +170,17 @@ return MCDisassembler::Success; } +static DecodeStatus DecodeSR07RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo >= 8) + return MCDisassembler::Fail; + + MCRegister Reg = (RegNo < 2) ? (RegNo + RISCV::X8) : (RegNo - 2 + RISCV::X18); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { @@ -369,6 +380,13 @@ uint64_t Address, const MCDisassembler *Decoder); +static DecodeStatus decodeZceRlist(MCInst &Inst, unsigned Imm, uint64_t Address, + const void *Decoder); + +static DecodeStatus decodeZceSpimm(MCInst &Inst, unsigned Imm, + uint64_t Address, + const void *Decoder); + #include "RISCVGenDisassemblerTables.inc" static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn, @@ -428,6 +446,24 @@ return MCDisassembler::Success; } +static DecodeStatus decodeZceRlist(MCInst &Inst, unsigned Imm, uint64_t Address, + const void *Decoder) { + // Sign-extend the number in the bottom N bits of Imm + if (Imm <= 3) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createImm(Imm)); + return MCDisassembler::Success; +} + +// spimm is based on rlist now. +static DecodeStatus decodeZceSpimm(MCInst &Inst, unsigned Imm, + uint64_t Address, + const void *Decoder){ + // TODO: check if spimm matches rlist + Inst.addOperand(MCOperand::createImm(Imm)); + return MCDisassembler::Success; +} + DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t Address, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -355,7 +355,8 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const { bool HasStdExtC = STI->getFeatureBits()[RISCV::FeatureStdExtC]; - unsigned MinNopLen = HasStdExtC ? 2 : 4; + bool HasStdExtZca = STI->getFeatureBits()[RISCV::FeatureExtZca]; + unsigned MinNopLen = (HasStdExtC||HasStdExtZca) ? 2 : 4; if ((Count % MinNopLen) != 0) return false; @@ -365,7 +366,7 @@ OS.write("\x13\0\0\0", 4); // The canonical nop on RVC is c.nop. - if (Count && HasStdExtC) + if (Count && (HasStdExtC||HasStdExtZca)) OS.write("\x01\0", 2); return true; @@ -588,7 +589,8 @@ return false; bool HasStdExtC = STI->getFeatureBits()[RISCV::FeatureStdExtC]; - unsigned MinNopLen = HasStdExtC ? 2 : 4; + bool HasStdExtZca = STI->getFeatureBits()[RISCV::FeatureExtZca]; + unsigned MinNopLen = (HasStdExtC||HasStdExtZca) ? 2 : 4; if (AF.getAlignment() <= MinNopLen) { return false; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -19,6 +19,7 @@ #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/RISCVISAInfo.h" +#include "llvm/MC/MCRegister.h" namespace llvm { @@ -433,6 +434,130 @@ } // namespace RISCVVType +namespace RISCVZCE { + +enum class RLISTENCODE { + RA = 4, + RA_S0, + RA_S0_S1, + RA_S0_S2, + RA_S0_S3, + RA_S0_S4, + RA_S0_S5, + RA_S0_S6, + RA_S0_S7, + RA_S0_S8, + RA_S0_S9, + // note - to include s10, s11 must also be included + RA_S0_S11, + RA_S0_S10, // This is for error checking. +}; + +inline unsigned encodeRlist(MCRegister EndReg, bool isCInst, + bool IsRV32E = false) { + auto RlistEncode = [=] { + switch (EndReg) { + case RISCV::X1: + return RLISTENCODE::RA; + case RISCV::X8: + return RLISTENCODE::RA_S0; + case RISCV::X9: + return RLISTENCODE::RA_S0_S1; + case RISCV::X18: + return RLISTENCODE::RA_S0_S2; + case RISCV::X19: + return RLISTENCODE::RA_S0_S3; + case RISCV::X20: + return RLISTENCODE::RA_S0_S4; + case RISCV::X21: + return RLISTENCODE::RA_S0_S5; + case RISCV::X22: + return RLISTENCODE::RA_S0_S6; + case RISCV::X23: + return RLISTENCODE::RA_S0_S7; + case RISCV::X24: + return RLISTENCODE::RA_S0_S8; + case RISCV::X25: + return RLISTENCODE::RA_S0_S9; + case RISCV::X26: + return RLISTENCODE::RA_S0_S10; + case RISCV::X27: + return RLISTENCODE::RA_S0_S11; + default: + llvm_unreachable("Undefined input."); + } + }(); + return static_cast(RlistEncode); +} + +inline static unsigned getStackAdjBase(unsigned rlistVal, bool isRV64, + bool isEABI) { + if (rlistVal == 16) + assert(0 && "{ra, s0-s10} is not supported, s11 must be included."); + if (isEABI) { + return 16; + } + if (!isRV64) { + switch (rlistVal) { + case /*ra*/ 4: + case /*s0*/ 5: + case /*s1*/ 6: + case /*s2*/ 7: + return 16; + case /*s3*/ 8: + case /*s4*/ 9: + case /*s5*/ 10: + case /*s6*/ 11: + return 32; + case /*s7*/ 12: + case /*s8*/ 13: + case /*s9*/ 14: + return 48; + case /*s11*/ 15: + return 64; + } + } else { + switch (rlistVal) { + case /*ra*/ 4: + case /*s0*/ 5: + return 16; + case /*s1*/ 6: + case /*s2*/ 7: + return 32; + case /*s3*/ 8: + case /*s4*/ 9: + return 48; + case /*s5*/ 10: + case /*s6*/ 11: + return 64; + case /*s7*/ 12: + case /*s8*/ 13: + return 80; + case /*s9*/ 14: + return 96; + case /*s11*/ 15: + return 112; + } + } + llvm_unreachable("Unexpected rlistVal"); +} + +inline static bool getSpimm(unsigned rlistVal, unsigned &spimmVal, + int64_t stackAdjustment, bool isRV64, bool isEABI) { + if (rlistVal == 16) + return false; + unsigned stackAdj = getStackAdjBase(rlistVal, isRV64, isEABI); + spimmVal = (stackAdjustment - stackAdj) / 16; + if (spimmVal > 3) + return false; + return true; +} + +void printRlist(unsigned SlistEncode, raw_ostream &OS); +void printSpimm(int64_t Spimm, raw_ostream &OS); + +} // namespace RISCVZCE + } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -182,4 +182,51 @@ OS << ", mu"; } +void RISCVZCE::printRlist(unsigned SlistEncode, raw_ostream &OS) { + OS << "{"; + switch (SlistEncode) { + case 4: + OS << "ra"; + break; + case 5: + OS << "ra, s0"; + break; + case 6: + OS << "ra, s0-s1"; + break; + case 7: + OS << "ra, s0-s2"; + break; + case 8: + OS << "ra, s0-s3"; + break; + case 9: + OS << "ra, s0-s4"; + break; + case 10: + OS << "ra, s0-s5"; + break; + case 11: + OS << "ra, s0-s6"; + break; + case 12: + OS << "ra, s0-s7"; + break; + case 13: + OS << "ra, s0-s8"; + break; + case 14: + OS << "ra, s0-s9"; + break; + case 15: + OS << "ra, s0-s11"; + break; + } + OS << "}"; +} + +void RISCVZCE::printSpimm(int64_t Spimm, raw_ostream &OS){ + OS << Spimm; +} + } // namespace llvm diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h @@ -46,6 +46,10 @@ raw_ostream &O); void printVMaskReg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printRlist(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printSpimm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); // Autogenerated by tblgen. std::pair getMnemonic(const MCInst *MI) override; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -181,6 +181,33 @@ RISCVVType::printVType(Imm, O); } +void RISCVInstPrinter::printRlist(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNo).getImm(); + RISCVZCE::printRlist(Imm, O); +} + +void RISCVInstPrinter::printSpimm(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O){ + int64_t Imm = MI->getOperand(OpNo).getImm(); + unsigned opcode = MI->getOpcode(); + bool isRV64 = STI.getFeatureBits()[RISCV::Feature64Bit]; + int64_t spimm = 0; + bool isEABI = false; // Reserved for future implementation + auto rlistVal = MI->getOperand(0).getImm(); + if (rlistVal == 16) + assert(0 && "Incorrect rlist."); + auto base = RISCVZCE::getStackAdjBase(rlistVal, isRV64, isEABI); + spimm = Imm + base; + if (spimm < base || spimm > base + 48) + llvm_unreachable("Incorrect spimm"); + if (opcode == RISCV::CM_PUSH) { + spimm *= -1; + } + + RISCVZCE::printSpimm(spimm, O); +} + void RISCVInstPrinter::printVMaskReg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -84,6 +84,10 @@ unsigned getVMaskReg(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + + unsigned getRlistOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; private: FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; @@ -403,5 +407,17 @@ } } +unsigned RISCVMCCodeEmitter::getRlistOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const{ + MCOperand MO = MI.getOperand(OpNo); + assert(MO.isImm() && "Rlist operand must be immidiate"); + auto Imm = MO.getImm(); + if(Imm <4) + assert(0 && "EABI is currently not implemented"); + else + return Imm; +} + #define ENABLE_INSTR_PREDICATE_VERIFIER #include "RISCVGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp @@ -19,5 +19,6 @@ unsigned RISCVMCObjectFileInfo::getTextSectionAlignment() const { const MCSubtargetInfo *STI = getContext().getSubtargetInfo(); - return STI->hasFeature(RISCV::FeatureStdExtC) ? 2 : 4; + return (STI->hasFeature(RISCV::FeatureStdExtC) || + STI->hasFeature(RISCV::FeatureExtZca)) ? 2 : 4; } diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -62,6 +62,12 @@ FunctionPass *createRISCVRedundantCopyEliminationPass(); void initializeRISCVRedundantCopyEliminationPass(PassRegistry &); +FunctionPass *createRISCVMoveOptimizationPass(); +void initializeRISCVMoveOptPass(PassRegistry&); + +FunctionPass *createRISCVPushPopOptimizationPass(); +void initializeRISCVPushPopOptPass(PassRegistry&); + InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, RISCVRegisterBankInfo &); diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -327,6 +327,35 @@ FeatureStdExtZkr, FeatureStdExtZkt]>; +def FeatureExtZca + : SubtargetFeature<"experimental-zca", "HasStdExtZca", "true", + "'Zca' (part of the C extension, excluding all 16-bit floating point loads and stores)">; +def HasStdExtZca : Predicate<"Subtarget->hasStdExtZca()">, + AssemblerPredicate<(all_of FeatureExtZca), + "'Zca' (part of the C extension)">; + +def FeatureExtZcb + : SubtargetFeature<"experimental-zcb", "HasStdExtZcb", "true", + "'Zcb' (Shortened format for basic bit manipulation instructions)", + [FeatureExtZca]>; +def HasStdExtZcb : Predicate<"Subtarget->hasStdExtZcb()">, + AssemblerPredicate<(all_of FeatureExtZcb), + "'Zcb' (Shortened format for basic bit manipulation instructions)">; + +def FeatureExtZcmp + : SubtargetFeature<"experimental-zcmp", "HasStdExtZcmp", "true", + "'Zcmp' (sequenced instuctions for code-size reduction.)", + [FeatureExtZca]>; +def HasStdExtZcmp : Predicate<"Subtarget->hasStdExtZcmp() && !Subtarget->hasStdExtC()">, + AssemblerPredicate<(all_of FeatureExtZcmp, (not FeatureStdExtC)), + "'Zcmp' (sequenced instuctions for code-size reduction.)">; + +def HasStdExtCOrZca + : Predicate<"Subtarget->hasStdExtC() || Subtarget->hasStdExtZca()">, + AssemblerPredicate<(any_of FeatureStdExtC, FeatureExtZca), + "'C' (Compressed Instructions) or " + "'Zca' (part of the C extension)">; + def FeatureNoRVCHints : SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false", "Disable RVC Hint Instructions.">; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -45,6 +45,8 @@ bool hasBP(const MachineFunction &MF) const; + bool isCSIpushable(const std::vector &CSI) const; + bool hasReservedCallFrame(const MachineFunction &MF) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -222,6 +222,96 @@ return RestoreLibCalls[LibCallID]; } +// Return encoded value for PUSH/POP instruction, representing +// registers to store/load. +static int getPushPopEncoding(const Register MaxReg) { + switch (MaxReg) { + default: + llvm_unreachable("Something has gone wrong!"); + case /*s11*/ RISCV::X27: + return 15; + case /*s9*/ RISCV::X25: + return 14; + case /*s8*/ RISCV::X24: + return 13; + case /*s7*/ RISCV::X23: + return 12; + case /*s6*/ RISCV::X22: + return 11; + case /*s5*/ RISCV::X21: + return 10; + case /*s4*/ RISCV::X20: + return 9; + case /*s3*/ RISCV::X19: + return 8; + case /*s2*/ RISCV::X18: + return 7; + case /*s1*/ RISCV::X9: + return 6; + case /*s0*/ RISCV::X8: + return 5; + case /*ra*/ RISCV::X1: + return 4; + } +} + +void reallocPushStackFream(MachineFunction &MF){ + auto *RVFI = MF.getInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + std::vector CSI = MFI.getCalleeSavedInfo(); + // realloc stack frame for PUSH + size_t NonePushStackOffset = -RVFI->getRVPushStackSize(); + for (const auto &Entry : CSI) { + int FrameIdx = Entry.getFrameIdx(); + Register Reg = Entry.getReg(); + if(!(Reg == RISCV::X26 || RISCV::PGPRRegClass.contains(Reg))){ + NonePushStackOffset -= MFI.getObjectSize(Entry.getFrameIdx()); + MFI.setObjectOffset(FrameIdx, NonePushStackOffset); + } + } +} + +static uint64_t adjSPInPushPop(MachineBasicBlock::iterator MBBI, uint64_t StackAdj, bool isPop){ + // The spec allocates 2 bits to specify number of extra 16 byte blocks. + uint32_t AvailableAdj = 48; + uint64_t RequiredAdj = StackAdj; + + // Use available stack adjustment in Zce PUSH/POP instruction + // to allocate/deallocate space on stack. + int OpNum = MBBI->getNumOperands(); + auto &Operand = MBBI->getOperand(OpNum -1); + int RegisterOffset = Operand.getImm(); + RequiredAdj -= RegisterOffset; + + if (RequiredAdj >= AvailableAdj){ + RequiredAdj -= AvailableAdj; + StackAdj = AvailableAdj; + } + else { + // Round to the nearest 16 byte block able to fit RequiredAdj. + StackAdj = alignTo(RequiredAdj, 16); + RequiredAdj = 0; + } + Operand.setImm(StackAdj); + MBBI->setFlag(isPop ? MachineInstr::FrameDestroy : MachineInstr::FrameSetup); + return RequiredAdj; +} + +// Checks if Zce PUSH/POP instructions can be used with the given CSI. +bool RISCVFrameLowering::isCSIpushable( + const std::vector &CSI) const { + if (!STI.hasStdExtZcmp() || CSI.empty()) + return false; + for (auto &CS: CSI){ + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = STI.getRegisterInfo()->getMinimalPhysRegClass(Reg); + if (RISCV::PGPRRegClass.hasSubClassEq(RC)) + return true; + } + return false; +} + // Return true if the specified function should have a dedicated frame // pointer register. This is true if frame pointer elimination is // disabled, if it needs dynamic stack realignment, if the function has @@ -350,11 +440,11 @@ // Returns the register used to hold the stack pointer. static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; } -static SmallVector +static std::vector getNonLibcallCSI(const MachineFunction &MF, const std::vector &CSI) { const MachineFrameInfo &MFI = MF.getFrameInfo(); - SmallVector NonLibcallCSI; + std::vector NonLibcallCSI; for (auto &CS : CSI) { int FI = CS.getFrameIdx(); @@ -379,6 +469,7 @@ Amount = -Amount; Opc = RISCV::SUB; } + // 1. Multiply the number of v-slots to the length of registers Register FactorRegister = TII->getVLENFactoredAmount(MF, MBB, MBBI, DL, Amount, Flag); @@ -469,8 +560,31 @@ RealStackSize = FirstSPAdjustAmount; } - // Allocate space on the stack if necessary. - adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); + const auto &CSI = MFI.getCalleeSavedInfo(); + bool PushEnabled = isCSIpushable(CSI); + if (PushEnabled && (CSI.size() != 0)){ + // Check at what offset spilling of registers starts and allocate space before it. + int64_t preAdjustStack = 0; + for (auto CS: CSI){ + preAdjustStack = std::min(preAdjustStack, -( MFI.getObjectOffset(CS.getFrameIdx()) + + MFI.getObjectSize(CS.getFrameIdx())) ); + } + if (preAdjustStack != 0) + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -preAdjustStack, MachineInstr::FrameSetup); + StackSize -= preAdjustStack; + + // Use available stack adjustment in push instruction to allocate additional stack space. + StackSize = adjSPInPushPop(MBBI, StackSize, false); + if (StackSize != 0){ + adjustReg(MBB, next_nodbg(MBBI, MBB.end()), DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); + MBBI = next_nodbg(MBBI, MBB.end()); + reallocPushStackFream(MF); + } + } + else{ + // Allocate space on the stack if necessary. + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); + } // Emit ".cfi_def_cfa_offset RealStackSize" unsigned CFIIndex = MF.addFrameInst( @@ -479,15 +593,16 @@ .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); - const auto &CSI = MFI.getCalleeSavedInfo(); - - // The frame pointer is callee-saved, and code has been generated for us to - // save it to the stack. We need to skip over the storing of callee-saved - // registers as the frame pointer must be modified after it has been saved - // to the stack, not before. - // FIXME: assumes exactly one instruction is used to save each callee-saved - // register. - std::advance(MBBI, getNonLibcallCSI(MF, CSI).size()); + if (PushEnabled) + std::advance(MBBI, 1); + else + // The frame pointer is callee-saved, and code has been generated for us to + // save it to the stack. We need to skip over the storing of callee-saved + // registers as the frame pointer must be modified after it has been saved + // to the stack, not before. + // FIXME: assumes exactly one instruction is used to save each callee-saved + // register. + std::advance(MBBI, getNonLibcallCSI(MF, CSI).size()); // Iterate over list of callee-saved registers and emit .cfi_offset // directives. @@ -628,7 +743,10 @@ // FIXME: assumes exactly one instruction is used to restore each // callee-saved register. auto LastFrameDestroy = MBBI; - if (!CSI.empty()) + bool PopEnabled = isCSIpushable(CSI); + if (PopEnabled) + LastFrameDestroy = prev_nodbg(MBBI, MBB.begin()); + else if (!CSI.empty()) LastFrameDestroy = std::prev(MBBI, CSI.size()); uint64_t StackSize = getStackSizeWithRVVPadding(MF); @@ -672,7 +790,27 @@ StackSize = FirstSPAdjustAmount; // Deallocate stack - adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); + if (PopEnabled){ + // Check at what offset spilling of registers starts and calculate space before it. + int64_t preAdjustSize = 0; + for (auto CS: CSI){ + preAdjustSize = std::min(preAdjustSize, -(MFI.getObjectOffset(CS.getFrameIdx()) + + MFI.getObjectSize(CS.getFrameIdx()))); + } + adjustReg(MBB, MBBI, DL, SPReg, SPReg, preAdjustSize, MachineInstr::FrameDestroy); + StackSize -= preAdjustSize; + if (preAdjustSize != 0) + MBBI = prev_nodbg(MBBI, MBB.begin()); + + + // Use available stack adjustment in pop instruction to deallocate stack space. + StackSize = adjSPInPushPop(prev_nodbg(MBBI, MBB.begin()), StackSize, true); + if (StackSize != 0){ + adjustReg(MBB, prev_nodbg(MBBI, MBB.begin()), DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); + } + } + else + adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); // Emit epilogue for shadow call stack. emitSCSEpilogue(MF, MBB, MBBI, DL); @@ -1148,31 +1286,74 @@ if (MI != MBB.end() && !MI->isDebugInstr()) DL = MI->getDebugLoc(); - const char *SpillLibCall = getSpillLibCallName(*MF, CSI); - if (SpillLibCall) { - // Add spill libcall via non-callee-saved register t0. - BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5) - .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL) - .setMIFlag(MachineInstr::FrameSetup); - - // Add registers spilled in libcall as liveins. - for (auto &CS : CSI) - MBB.addLiveIn(CS.getReg()); + // Emmit CM.PUSH with base SPimm & evaluate Push stack + if (isCSIpushable(CSI.vec())){ + auto *RVFI = MF->getInfo(); + uint64_t PushStackSize = 0; + std::vector NonePushCSI; + Register MaxReg = RISCV::NoRegister; + + for (auto &CS: CSI){ + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + if (RISCV::PGPRRegClass.hasSubClassEq(RC)){ + if(Reg != RISCV::X27) + PushStackSize += 4; + MaxReg = std::max(MaxReg.id(), Reg.id()); + } + else if(Reg.id() == RISCV::X26){ + PushStackSize += 8; + MaxReg = RISCV::X27; + } + else + NonePushCSI.push_back(CS); + } + RVFI->setRVPushStackSize(PushStackSize); + + MachineInstrBuilder PushBuilder = + BuildMI(MBB, MI, DL, TII.get(RISCV::CM_PUSH)); + // Use encoded number to represent registers to spill. + int RegEnc = getPushPopEncoding(MaxReg); + PushBuilder.addImm(RegEnc); + // Calculate SpImm Base adjustment, and SpImm field will be updated + // through adjSPInPushPop. + bool isRV64 = STI.getFeatureBits()[RISCV::Feature64Bit]; + bool isEABI = false; // Reserved for future implementation + uint32_t SpImmBase = RISCVZCE::getStackAdjBase(RegEnc, isRV64, isEABI); + PushBuilder.addImm(SpImmBase); + + for(auto &CS: NonePushCSI){ + Register Reg = CS.getReg(); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), + TRI->getMinimalPhysRegClass(Reg), TRI); + } } + else { + const char *SpillLibCall = getSpillLibCallName(*MF, CSI); + if (SpillLibCall) { + // Add spill libcall via non-callee-saved register t0. + BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5) + .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL) + .setMIFlag(MachineInstr::FrameSetup); + + // Add registers spilled in libcall as liveins. + for (auto &CS : CSI) + MBB.addLiveIn(CS.getReg()); + } - // Manually spill values not spilled by libcall. - const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); - for (auto &CS : NonLibcallCSI) { - // Insert the spill to the stack frame. - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(), + // Manually spill values not spilled by libcall. + const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); + for (auto &CS : NonLibcallCSI) { + // Insert the spill to the stack frame. + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(), RC, TRI); + } } return true; } - bool RISCVFrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { @@ -1185,36 +1366,64 @@ if (MI != MBB.end() && !MI->isDebugInstr()) DL = MI->getDebugLoc(); - // Manually restore values not restored by libcall. - // Keep the same order as in the prologue. There is no need to reverse the - // order in the epilogue. In addition, the return address will be restored - // first in the epilogue. It increases the opportunity to avoid the - // load-to-use data hazard between loading RA and return by RA. - // loadRegFromStackSlot can insert multiple instructions. - const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); - for (auto &CS : NonLibcallCSI) { - Register Reg = CS.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); - assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); - } + if (isCSIpushable(CSI.vec())){ + Register MaxReg = RISCV::NoRegister; - const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI); - if (RestoreLibCall) { - // Add restore libcall via tail call. - MachineBasicBlock::iterator NewMI = - BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL)) - .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL) - .setMIFlag(MachineInstr::FrameDestroy); - - // Remove trailing returns, since the terminator is now a tail call to the - // restore function. - if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) { - NewMI->copyImplicitOps(*MF, *MI); - MI->eraseFromParent(); + for (auto &CS: reverse(CSI)){ + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + if (RISCV::PGPRRegClass.hasSubClassEq(RC)) + MaxReg = std::max(MaxReg.id(), Reg.id()); + else if(Reg.id() == RISCV::X26){ + MaxReg = RISCV::X27; + } + else + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); } + + MachineInstrBuilder PopBuilder = + BuildMI(MBB, MI, DL, TII.get(RISCV::CM_POP)); + // Use encoded number to represent registers to restore. + int RegEnc = getPushPopEncoding(MaxReg); + PopBuilder.addImm(RegEnc); + // Calculate SpImm Base adjustment, and SpImm field will be updated + // through adjSPInPushPop. + bool isRV64 = STI.getFeatureBits()[RISCV::Feature64Bit]; + bool isEABI = false; // Reserved for future implementation + uint32_t SpImmBase = RISCVZCE::getStackAdjBase(RegEnc, isRV64, isEABI); + PopBuilder.addImm(SpImmBase); } + else{ + // Manually restore values not restored by libcall. + // Keep the same order as in the prologue. There is no need to reverse the + // order in the epilogue. In addition, the return address will be restored + // first in the epilogue. It increases the opportunity to avoid the + // load-to-use data hazard between loading RA and return by RA. + // loadRegFromStackSlot can insert multiple instructions. + const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); + for (auto &CS : NonLibcallCSI) { + Register Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); + assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); + } + const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI); + if (RestoreLibCall) { + // Add restore libcall via tail call. + MachineBasicBlock::iterator NewMI = + BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL)) + .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL) + .setMIFlag(MachineInstr::FrameDestroy); + + // Remove trailing returns, since the terminator is now a tail call to the + // restore function. + if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) { + NewMI->copyImplicitOps(*MF, *MI); + MI->eraseFromParent(); + } + } + } return true; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -933,7 +933,7 @@ } // Function alignments. - const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); + const Align FunctionAlignment((Subtarget.hasStdExtC() || Subtarget.hasStdExtZca())? 2 : 4); setMinFunctionAlignment(FunctionAlignment); setPrefFunctionAlignment(FunctionAlignment); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -104,6 +104,9 @@ bool isAsCheapAsAMove(const MachineInstr &MI) const override; + Optional + isLoadImmImpl(const MachineInstr &MI) const; + Optional isCopyInstrImpl(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -58,7 +58,8 @@ STI(STI) {} MCInst RISCVInstrInfo::getNop() const { - if (STI.getFeatureBits()[RISCV::FeatureStdExtC]) + if (STI.getFeatureBits()[RISCV::FeatureStdExtC] + || STI.getFeatureBits()[RISCV::FeatureExtZca]) return MCInstBuilder(RISCV::C_NOP); return MCInstBuilder(RISCV::ADDI) .addReg(RISCV::X0) @@ -1017,6 +1018,24 @@ return MI.isAsCheapAsAMove(); } +Optional +RISCVInstrInfo::isLoadImmImpl(const MachineInstr &MI) const{ + if (MI.isMoveImmediate()) + return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; + switch (MI.getOpcode()) { + default: + break; + case RISCV::ADDIW: + case RISCV::ADDI: + // Operand 1 can be a frameindex but callers expect registers + if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && + MI.getOperand(1).getReg() == RISCV::X0) + return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; + break; + } + return None; +} + Optional RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { if (MI.isMoveReg()) @@ -1024,6 +1043,16 @@ switch (MI.getOpcode()) { default: break; + case RISCV::ADD: + if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isReg()) + break; + if ((MI.getOperand(1).getReg() == RISCV::X0) + && (MI.getOperand(2).getReg() != RISCV::X0)) + return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; + if ((MI.getOperand(1).getReg() != RISCV::X0) + && (MI.getOperand(2).getReg() == RISCV::X0)) + return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; + break; case RISCV::ADDI: // Operand 1 can be a frameindex but callers expect registers if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && @@ -1243,7 +1272,9 @@ // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled. unsigned FrameOverhead = 4; if (RepeatedSequenceLocs[0].getMF()->getSubtarget() - .getFeatureBits()[RISCV::FeatureStdExtC]) + .getFeatureBits()[RISCV::FeatureStdExtC] || + RepeatedSequenceLocs[0].getMF()->getSubtarget() + .getFeatureBits()[RISCV::FeatureExtZca]) FrameOverhead = 2; return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1677,5 +1677,6 @@ include "RISCVInstrInfoZb.td" include "RISCVInstrInfoZk.td" include "RISCVInstrInfoV.td" +include "RISCVInstrInfoZc.td" include "RISCVInstrInfoZfh.td" include "RISCVInstrInfoZicbo.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -281,7 +281,7 @@ // Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtC] in { +let Predicates = [HasStdExtCOrZca] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [X2] in def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd), @@ -321,7 +321,7 @@ let Inst{5} = imm{6}; } -let Predicates = [HasStdExtC, IsRV64] in +let Predicates = [HasStdExtCOrZca, IsRV64] in def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>, Sched<[WriteLDD, ReadMemBase]> { bits<8> imm; @@ -355,7 +355,7 @@ let Inst{5} = imm{6}; } -let Predicates = [HasStdExtC, IsRV64] in +let Predicates = [HasStdExtCOrZca, IsRV64] in def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000>, Sched<[WriteSTD, ReadStoreData, ReadMemBase]> { bits<8> imm; @@ -391,12 +391,12 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1, DecoderNamespace = "RISCV32Only_", Defs = [X1], - Predicates = [HasStdExtC, IsRV32] in + Predicates = [HasStdExtCOrZca, IsRV32] in def C_JAL : RVInst16CJ<0b001, 0b01, (outs), (ins simm12_lsb0:$offset), "c.jal", "$offset">, Sched<[WriteJal]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0, - Predicates = [HasStdExtC, IsRV64] in + Predicates = [HasStdExtCOrZca, IsRV64] in def C_ADDIW : RVInst16CI<0b001, 0b01, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, simm6:$imm), "c.addiw", "$rd, $imm">, @@ -405,7 +405,7 @@ let Inst{6-2} = imm{4-0}; } -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Predicates = [HasStdExtCOrZca]in def C_LI : RVInst16CI<0b010, 0b01, (outs GPRNoX0:$rd), (ins simm6:$imm), "c.li", "$rd, $imm">, Sched<[WriteIALU]> { @@ -458,7 +458,7 @@ def C_AND : CS_ALU<0b100011, 0b11, "c.and", GPRC>, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -let Predicates = [HasStdExtC, IsRV64] in { +let Predicates = [HasStdExtCOrZca, IsRV64] in { def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>, Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>, @@ -506,7 +506,7 @@ let Inst{3-2} = imm{7-6}; } -let Predicates = [HasStdExtC, IsRV64] in +let Predicates = [HasStdExtCOrZca, IsRV64] in def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000>, Sched<[WriteLDD, ReadMemBase]> { let Inst{6-5} = imm{4-3}; @@ -566,7 +566,7 @@ let Inst{8-7} = imm{7-6}; } -let Predicates = [HasStdExtC, IsRV64] in +let Predicates = [HasStdExtCOrZca, IsRV64] in def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000>, Sched<[WriteSTD, ReadStoreData, ReadMemBase]> { let Inst{12-10} = imm{5-3}; @@ -581,13 +581,13 @@ let Inst{15-0} = 0; } -} // Predicates = [HasStdExtC] +} // Predicates = [HasStdExtCOrZca] //===----------------------------------------------------------------------===// // HINT Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0, +let Predicates = [HasStdExtCOrZca, HasRVCHints], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { @@ -688,7 +688,7 @@ let Inst{12} = 0; } -} // Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0, +} // Predicates = [HasStdExtCOrZca, HasRVCHints], hasSideEffects = 0, mayLoad = 0, // mayStore = 0 //===----------------------------------------------------------------------===// @@ -704,7 +704,7 @@ let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in def : InstAlias<"c.flw $rd, (${rs1})", (C_FLW FPR32C:$rd, GPRC:$rs1, 0)>; -let Predicates = [HasStdExtC, IsRV64] in +let Predicates = [HasStdExtCOrZca, IsRV64] in def : InstAlias<"c.ld $rd, (${rs1})", (C_LD GPRC:$rd, GPRC:$rs1, 0)>; let Predicates = [HasStdExtC, HasStdExtD] in @@ -715,7 +715,7 @@ let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in def : InstAlias<"c.fsw $rs2, (${rs1})", (C_FSW FPR32C:$rs2, GPRC:$rs1, 0)>; -let Predicates = [HasStdExtC, IsRV64] in +let Predicates = [HasStdExtCOrZca, IsRV64] in def : InstAlias<"c.sd $rs2, (${rs1})", (C_SD GPRC:$rs2, GPRC:$rs1, 0)>; let Predicates = [HasStdExtC, HasStdExtD] in @@ -726,7 +726,7 @@ let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in def : InstAlias<"c.flwsp $rd, (${rs1})", (C_FLWSP FPR32C:$rd, SP:$rs1, 0)>; -let Predicates = [HasStdExtC, IsRV64] in +let Predicates = [HasStdExtCOrZca, IsRV64] in def : InstAlias<"c.ldsp $rd, (${rs1})", (C_LDSP GPRC:$rd, SP:$rs1, 0)>; let Predicates = [HasStdExtC, HasStdExtD] in @@ -737,7 +737,7 @@ let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in def : InstAlias<"c.fswsp $rs2, (${rs1})", (C_FSWSP FPR32C:$rs2, SP:$rs1, 0)>; -let Predicates = [HasStdExtC, IsRV64] in +let Predicates = [HasStdExtCOrZca, IsRV64] in def : InstAlias<"c.sdsp $rs2, (${rs1})", (C_SDSP GPRC:$rs2, SP:$rs1, 0)>; } @@ -749,69 +749,69 @@ // on page 82 of the ISA manual. // Quadrant 0 -let Predicates = [HasStdExtC] in { +let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(ADDI GPRC:$rd, SP:$rs1, uimm10_lsb00nonzero:$imm), (C_ADDI4SPN GPRC:$rd, SP:$rs1, uimm10_lsb00nonzero:$imm)>; -} // Predicates = [HasStdExtC] +} // Predicates = [HasStdExtCOrZca] let Predicates = [HasStdExtC, HasStdExtD] in { def : CompressPat<(FLD FPR64C:$rd, GPRC:$rs1, uimm8_lsb000:$imm), (C_FLD FPR64C:$rd, GPRC:$rs1, uimm8_lsb000:$imm)>; } // Predicates = [HasStdExtC, HasStdExtD] -let Predicates = [HasStdExtC] in { +let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(LW GPRC:$rd, GPRC:$rs1, uimm7_lsb00:$imm), (C_LW GPRC:$rd, GPRC:$rs1, uimm7_lsb00:$imm)>; -} // Predicates = [HasStdExtC] +} // Predicates = [HasStdExtCOrZca] let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in { def : CompressPat<(FLW FPR32C:$rd, GPRC:$rs1, uimm7_lsb00:$imm), (C_FLW FPR32C:$rd, GPRC:$rs1, uimm7_lsb00:$imm)>; } // Predicates = [HasStdExtC, HasStdExtF, IsRV32] -let Predicates = [HasStdExtC, IsRV64] in { +let Predicates = [HasStdExtCOrZca, IsRV64] in { def : CompressPat<(LD GPRC:$rd, GPRC:$rs1, uimm8_lsb000:$imm), (C_LD GPRC:$rd, GPRC:$rs1, uimm8_lsb000:$imm)>; -} // Predicates = [HasStdExtC, IsRV64] +} // Predicates = [HasStdExtCOrZca, IsRV64] let Predicates = [HasStdExtC, HasStdExtD] in { def : CompressPat<(FSD FPR64C:$rs2, GPRC:$rs1, uimm8_lsb000:$imm), (C_FSD FPR64C:$rs2, GPRC:$rs1, uimm8_lsb000:$imm)>; } // Predicates = [HasStdExtC, HasStdExtD] -let Predicates = [HasStdExtC] in { +let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(SW GPRC:$rs2, GPRC:$rs1, uimm7_lsb00:$imm), (C_SW GPRC:$rs2, GPRC:$rs1, uimm7_lsb00:$imm)>; -} // Predicates = [HasStdExtC] +} // Predicates = [HasStdExtCOrZca] let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in { def : CompressPat<(FSW FPR32C:$rs2, GPRC:$rs1, uimm7_lsb00:$imm), (C_FSW FPR32C:$rs2, GPRC:$rs1, uimm7_lsb00:$imm)>; } // Predicates = [HasStdExtC, HasStdExtF, IsRV32] -let Predicates = [HasStdExtC, IsRV64] in { +let Predicates = [HasStdExtCOrZca, IsRV64] in { def : CompressPat<(SD GPRC:$rs2, GPRC:$rs1, uimm8_lsb000:$imm), (C_SD GPRC:$rs2, GPRC:$rs1, uimm8_lsb000:$imm)>; -} // Predicates = [HasStdExtC, IsRV64] +} // Predicates = [HasStdExtCOrZca, IsRV64] // Quadrant 1 -let Predicates = [HasStdExtC] in { +let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(ADDI X0, X0, 0), (C_NOP)>; def : CompressPat<(ADDI GPRNoX0:$rs1, GPRNoX0:$rs1, simm6nonzero:$imm), (C_ADDI GPRNoX0:$rs1, simm6nonzero:$imm)>; -} // Predicates = [HasStdExtC] +} // Predicates = [HasStdExtCOrZca] -let Predicates = [HasStdExtC, IsRV32] in { +let Predicates = [HasStdExtCOrZca, IsRV32] in { def : CompressPat<(JAL X1, simm12_lsb0:$offset), (C_JAL simm12_lsb0:$offset)>; -} // Predicates = [HasStdExtC, IsRV32] +} // Predicates = [HasStdExtCOrZca, IsRV32] -let Predicates = [HasStdExtC, IsRV64] in { +let Predicates = [HasStdExtCOrZca, IsRV64] in { def : CompressPat<(ADDIW GPRNoX0:$rs1, GPRNoX0:$rs1, simm6:$imm), (C_ADDIW GPRNoX0:$rs1, simm6:$imm)>; -} // Predicates = [HasStdExtC, IsRV64] +} // Predicates = [HasStdExtCOrZca, IsRV64] -let Predicates = [HasStdExtC] in { +let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(ADDI GPRNoX0:$rd, X0, simm6:$imm), (C_LI GPRNoX0:$rd, simm6:$imm)>; def : CompressPat<(ADDI X2, X2, simm10_lsb0000nonzero:$imm), @@ -841,9 +841,9 @@ let isCompressOnly = true in def : CompressPat<(AND GPRC:$rs1, GPRC:$rs2, GPRC:$rs1), (C_AND GPRC:$rs1, GPRC:$rs2)>; -} // Predicates = [HasStdExtC] +} // Predicates = [HasStdExtCOrZca] -let Predicates = [HasStdExtC, IsRV64] in { +let Predicates = [HasStdExtCOrZca, IsRV64] in { let isCompressOnly = true in def : CompressPat<(ADDIW GPRNoX0:$rd, X0, simm6:$imm), (C_LI GPRNoX0:$rd, simm6:$imm)>; @@ -854,44 +854,44 @@ let isCompressOnly = true in def : CompressPat<(ADDW GPRC:$rs1, GPRC:$rs2, GPRC:$rs1), (C_ADDW GPRC:$rs1, GPRC:$rs2)>; -} // Predicates = [HasStdExtC, IsRV64] +} // Predicates = [HasStdExtCOrZca, IsRV64] -let Predicates = [HasStdExtC] in { +let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(JAL X0, simm12_lsb0:$offset), (C_J simm12_lsb0:$offset)>; def : CompressPat<(BEQ GPRC:$rs1, X0, simm9_lsb0:$imm), (C_BEQZ GPRC:$rs1, simm9_lsb0:$imm)>; def : CompressPat<(BNE GPRC:$rs1, X0, simm9_lsb0:$imm), (C_BNEZ GPRC:$rs1, simm9_lsb0:$imm)>; -} // Predicates = [HasStdExtC] +} // Predicates = [HasStdExtCOrZca] // Quadrant 2 -let Predicates = [HasStdExtC] in { +let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(SLLI GPRNoX0:$rs1, GPRNoX0:$rs1, uimmlog2xlennonzero:$imm), (C_SLLI GPRNoX0:$rs1, uimmlog2xlennonzero:$imm)>; -} // Predicates = [HasStdExtC] +} // Predicates = [HasStdExtCOrZca] let Predicates = [HasStdExtC, HasStdExtD] in { def : CompressPat<(FLD FPR64:$rd, SP:$rs1, uimm9_lsb000:$imm), (C_FLDSP FPR64:$rd, SP:$rs1, uimm9_lsb000:$imm)>; } // Predicates = [HasStdExtC, HasStdExtD] -let Predicates = [HasStdExtC] in { +let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(LW GPRNoX0:$rd, SP:$rs1, uimm8_lsb00:$imm), (C_LWSP GPRNoX0:$rd, SP:$rs1, uimm8_lsb00:$imm)>; -} // Predicates = [HasStdExtC] +} // Predicates = [HasStdExtCOrZca] let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in { def : CompressPat<(FLW FPR32:$rd, SP:$rs1, uimm8_lsb00:$imm), (C_FLWSP FPR32:$rd, SP:$rs1, uimm8_lsb00:$imm)>; } // Predicates = [HasStdExtC, HasStdExtF, IsRV32] -let Predicates = [HasStdExtC, IsRV64] in { +let Predicates = [HasStdExtCOrZca, IsRV64] in { def : CompressPat<(LD GPRNoX0:$rd, SP:$rs1, uimm9_lsb000:$imm), (C_LDSP GPRNoX0:$rd, SP:$rs1, uimm9_lsb000:$imm)>; -} // Predicates = [HasStdExtC, IsRV64] +} // Predicates = [HasStdExtCOrZca, IsRV64] -let Predicates = [HasStdExtC] in { +let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(JALR X0, GPRNoX0:$rs1, 0), (C_JR GPRNoX0:$rs1)>; let isCompressOnly = true in { @@ -911,24 +911,24 @@ let isCompressOnly = true in def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, GPRNoX0:$rs1), (C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>; -} // Predicates = [HasStdExtC] +} // Predicates = [HasStdExtCOrZca] let Predicates = [HasStdExtC, HasStdExtD] in { def : CompressPat<(FSD FPR64:$rs2, SP:$rs1, uimm9_lsb000:$imm), (C_FSDSP FPR64:$rs2, SP:$rs1, uimm9_lsb000:$imm)>; } // Predicates = [HasStdExtC, HasStdExtD] -let Predicates = [HasStdExtC] in { +let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(SW GPR:$rs2, SP:$rs1, uimm8_lsb00:$imm), (C_SWSP GPR:$rs2, SP:$rs1, uimm8_lsb00:$imm)>; -} // Predicates = [HasStdExtC] +} // Predicates = [HasStdExtCOrZca] let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in { def : CompressPat<(FSW FPR32:$rs2, SP:$rs1, uimm8_lsb00:$imm), (C_FSWSP FPR32:$rs2, SP:$rs1, uimm8_lsb00:$imm)>; } // Predicates = [HasStdExtC, HasStdExtF, IsRV32] -let Predicates = [HasStdExtC, IsRV64] in { +let Predicates = [HasStdExtCOrZca, IsRV64] in { def : CompressPat<(SD GPR:$rs2, SP:$rs1, uimm9_lsb000:$imm), (C_SDSP GPR:$rs2, SP:$rs1, uimm9_lsb000:$imm)>; -} // Predicates = [HasStdExtC, IsRV64] +} // Predicates = [HasStdExtCOrZca, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td @@ -0,0 +1,323 @@ +//===-- RISCVInstrInfoZce.td - RISC-V 'Zce' instructions -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file describes the RISC-V instructions from the 'Zce' Code-size reduction +/// extension, version 0.24. +/// This version is still experimental as the 'Zce' extension hasn't been +/// ratified yet. +/// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +def uimm2_zce : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<2>; + let DecoderMethod = "decodeUImmOperand<2>"; + let OperandType = "OPERAND_UIMM2"; + let OperandNamespace = "RISCVOp"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isUInt<2>(Imm); + }]; +} + +def uimm2_lsb0 : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<2, "Lsb0">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<2>"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<1, 1>(Imm); + }]; +} + +def RlistAsmOperand : AsmOperandClass { + let Name = "Rlist"; + let ParserMethod = "parseReglist"; + let DiagnosticType = "InvalidRlist"; +} + +def SpimmAsmOperand : AsmOperandClass { + let Name = "Spimm"; + let ParserMethod = "parseZceSpimm"; + let DiagnosticType = "InvalidSpimm"; +} + +def rlist : Operand, ImmLeaf(Imm);}]> { + let ParserMatchClass = RlistAsmOperand; + let PrintMethod = "printRlist"; + let DecoderMethod = "decodeZceRlist"; + let EncoderMethod = "getRlistOpValue"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + if (!isUInt<4>(Imm)) return false; + // 0~3 Reserved for EABI + return (Imm>=4) && (Imm <=15); + }]; + } + +def spimm : Operand, ImmLeaf(Imm);}]>{ + let ParserMatchClass = SpimmAsmOperand; + let PrintMethod = "printSpimm"; + let DecoderMethod = "decodeZceSpimm"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<5, 4>(Imm); + }]; +} + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVZceArith_r funct2, bits<3> opcode, string opcodestr> + : RVInst16<(outs GPRC:$rs_wb), (ins GPRC:$rs), opcodestr, "$rs", [], InstFormatCB> { + bits<3> rs; + let Constraints = "$rs = $rs_wb"; + + let Inst{15-13} = 0b100; + let Inst{12-10} = 0b111; + let Inst{9-7} = rs; + let Inst{6-5} = funct2; + let Inst{4-2} = opcode; + let Inst{1-0} = 0b01; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVZceArith_rr funct6,bits<2> funct2, bits<2> opcode, string opcodestr> + : RVInst16<(outs GPRC:$rs1_wb), (ins GPRC:$rs1, GPRC:$rs2), opcodestr, "$rs1, $rs2", [], InstFormatCB> { + bits<3> rs1; + bits<3> rs2; + let Constraints = "$rs1 = $rs1_wb"; + + let Inst{15-10} = funct6; + let Inst{9-7} = rs1; + let Inst{6-5} = funct2; + let Inst{4-2} = rs2; + let Inst{1-0} = opcode; +} + +class RVInstZceCPPP + : RVInst16 { + + let Inst{1-0} = 0b10; + let Inst{15-13} = 0b101; +} + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +class ZceLoad_ri funct3, bits<2> opcode, string opcodestr, + RegisterClass cls, DAGOperand opnd> + : RVInst16CL; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +class ZceStore_ri funct3, bits<2> opcode, string opcodestr, + RegisterClass cls, DAGOperand opnd> + : RVInst16CS; + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +// ZCB + +let Predicates = [HasStdExtZcb, IsRV64] in { +def C_ZEXT_W : RVZceArith_r<0b11, 0b100 , "c.zext.w">, Sched<[]>; +} + +let Predicates = [HasStdExtZcb] in { +def C_ZEXT_B : RVZceArith_r<0b11, 0b000 , "c.zext.b">, Sched<[]>; +def C_ZEXT_H : RVZceArith_r<0b11, 0b010 , "c.zext.h">, Sched<[]>; + +def C_SEXT_B : RVZceArith_r<0b11, 0b001 , "c.sext.b">, Sched<[]>; +def C_SEXT_H : RVZceArith_r<0b11, 0b011 , "c.sext.h">, Sched<[]>; + +def C_MUL : RVZceArith_rr<0b100111, 0b10, 0b01, "c.mul">, Sched<[]>; + +def C_NOT_ZCE : RVZceArith_r<0b11, 0b101 , "c.not">, Sched<[]>; + +def C_LBU : ZceLoad_ri<0b100, 0b00, "c.lbu", GPRC, uimm2_zce>, + Sched<[]> { +bits<2> imm; + +let Inst{12-10} = 0b000; +let Inst{6-5} = imm{0,1}; +} + +def C_LHU : ZceLoad_ri<0b100, 0b00, "c.lhu", GPRC, uimm2_lsb0>, + Sched<[]> { +bits<2> imm; + +let Inst{12-10} = 0b001; +let Inst{6} = 0b0; +let Inst{5} = imm{1}; +} + +def C_LH : ZceLoad_ri<0b100, 0b00, "c.lh", GPRC, uimm2_lsb0>, + Sched<[]> { +bits<2> imm; + +let Inst{12-10} = 0b001; +let Inst{6} = 0b1; +let Inst{5} = imm{1}; +} + +def C_SB : ZceStore_ri<0b100, 0b00, "c.sb", GPRC, uimm2_zce>, + Sched<[]> { + bits<2> imm; + + let Inst{12-10} = 0b010; + let Inst{6-5} = imm{0,1}; +} + +def C_SH : ZceStore_ri<0b100, 0b00, "c.sh", GPRC, uimm2_lsb0>, + Sched<[]> { + bits<2> imm; + + let Inst{12-10} = 0b011; + let Inst{6} = 0b1; + let Inst{5} = imm{1}; +} +} + +// ZCMP +let Predicates = [HasStdExtZcmp], Defs = [X10, X11], + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +def CM_MVA01S : RVInst16CA<0b101011, 0b11, 0b10, (outs), + (ins SR07:$rs1, SR07:$rs2), "cm.mva01s", "$rs1, $rs2">, + Sched<[]>; + +def CM_MVSA01 : RVInst16CA<0b101011, 0b01, 0b10, (outs SR07:$rs1, SR07:$rs2), + (ins), "cm.mvsa01", "$rs1, $rs2">, + Sched<[]>; +} + +let Predicates = [HasStdExtZcmp] in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def CM_PUSH : RVInstZceCPPP<(outs), (ins rlist:$rlist, spimm:$spimm), + "cm.push", "{$rlist}, $spimm">, Sched<[]> { + bits<4> rlist; + bits<16> spimm; + + let Inst{12-8} = 0b11000; + let Inst{7-4} = rlist; + let Inst{3-2} = spimm{5-4}; +} + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def CM_POPRET : RVInstZceCPPP<(outs), (ins rlist:$rlist, spimm:$spimm), + "cm.popret", "{$rlist}, $spimm">, Sched<[]> { + bits<4> rlist; + bits<16> spimm; + + let Inst{12-8} = 0b11110; + let Inst{7-4} = rlist; + let Inst{3-2} = spimm{5-4}; +} + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def CM_POPRETZ : RVInstZceCPPP<(outs), (ins rlist:$rlist, spimm:$spimm), + "cm.popretz", "{$rlist}, $spimm">, Sched<[]> { + bits<4> rlist; + bits<16> spimm; + + let Inst{12-8} = 0b11100; + let Inst{7-4} = rlist; + let Inst{3-2} = spimm{5-4}; +} + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def CM_POP : RVInstZceCPPP<(outs), (ins rlist:$rlist, spimm:$spimm), + "cm.pop", "{$rlist}, $spimm">, Sched<[]> { + bits<4> rlist; + bits<16> spimm; + + let Inst{12-8} = 0b11010; + let Inst{7-4} = rlist; + let Inst{3-2} = spimm{5-4}; +} +} + +let isCompressOnly = true in { + +let Predicates = [HasStdExtZcb] in{ +def : CompressPat<(MUL GPRC:$rs1, GPRC:$rs1, GPRC:$rs2), + (C_MUL GPRC:$rs1, GPRC:$rs2)>; + +def : CompressPat<(MUL GPRC:$rs1, GPRC:$rs2, GPRC:$rs1), + (C_MUL GPRC:$rs1, GPRC:$rs2)>; +} //Predicates = [HasStdExtZcb] + +let Predicates = [HasStdExtZcb, HasStdExtZbb] in{ +def : CompressPat<(SEXT_B GPRC:$rs1, GPRC:$rs1), + (C_SEXT_B GPRC:$rs1, GPRC:$rs1)>; + +def : CompressPat<(SEXT_H GPRC:$rs1, GPRC:$rs1), + (C_SEXT_H GPRC:$rs1, GPRC:$rs1)>; +} //Predicates = [HasStdExtZcb, HasStdExtZbb] + +let Predicates = [HasStdExtZcb, HasStdExtZbb] in{ +def : CompressPat<(ZEXT_H_RV32 GPRC:$rs1, GPRC:$rs1), + (C_ZEXT_H GPRC:$rs1, GPRC:$rs1)>; + +def : CompressPat<(ZEXT_H_RV64 GPRC:$rs1, GPRC:$rs1), + (C_ZEXT_H GPRC:$rs1, GPRC:$rs1)>; +} //Predicates = [HasStdExtZcb, HasStdExtZbb] + +// zext.b +let Predicates = [HasStdExtZcb] in{ +def : CompressPat<(ANDI GPRC:$rs1, GPRC:$rs1, 255), + (C_ZEXT_B GPRC:$rs1, GPRC:$rs1)>; +} //Predicates = [HasStdExtZcb, HasStdExtZbb] + +// zext.w +let Predicates = [HasStdExtZcb, HasStdExtZba, IsRV64] in{ +def : CompressPat<(ADD_UW GPRC:$rs1, GPRC:$rs1, X0), + (C_ZEXT_W GPRC:$rs1, GPRC:$rs1)>; +} //Predicates = [HasStdExtZcb, HasStdExtZba, IsRV64] + +let Predicates = [HasStdExtZcb] in{ +def : CompressPat<(XORI GPRC:$rs1, GPRC:$rs1, -1), + (C_NOT_ZCE GPRC:$rs1, GPRC:$rs1)>; +} + +let Predicates = [HasStdExtZcb] in{ +//c.lbu c.lhu +def : CompressPat<(LBU GPRC:$rd, GPRC:$rs1, uimm2_zce:$imm), + (C_LBU GPRC:$rd, GPRC:$rs1, uimm2_zce:$imm)>; + +def : CompressPat<(LHU GPRC:$rd, GPRC:$rs1, uimm2_lsb0:$imm), + (C_LHU GPRC:$rd, GPRC:$rs1, uimm2_lsb0:$imm)>; + +//c.lh +def : CompressPat<(LH GPRC:$rd, GPRC:$rs1, uimm2_lsb0:$imm), + (C_LH GPRC:$rd, GPRC:$rs1, uimm2_lsb0:$imm)>; + +//c.sb c.sh +def : CompressPat<(SB GPRC:$rs2, GPRC:$rs1, uimm2_zce:$imm), + (C_SB GPRC:$rs2, GPRC:$rs1, uimm2_zce:$imm)>; + +def : CompressPat<(SH GPRC:$rs2, GPRC:$rs1, uimm2_lsb0:$imm), + (C_SH GPRC:$rs2, GPRC:$rs1, uimm2_lsb0:$imm)>; +}// Predicates = [HasStdExtZcb] + +} // isCompressOnly = true diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -63,6 +63,8 @@ uint64_t RVVPadding = 0; /// Size of stack frame to save callee saved registers unsigned CalleeSavedStackSize = 0; + /// Size of stack frame for zcmp PUSH/POP + unsigned RVPushStackSize = 0; public: RISCVMachineFunctionInfo(const MachineFunction &MF) {} @@ -93,7 +95,8 @@ // function uses a varargs save area, or is an interrupt handler. return MF.getSubtarget().enableSaveRestore() && VarArgsSaveSize == 0 && !MF.getFrameInfo().hasTailCall() && - !MF.getFunction().hasFnAttribute("interrupt"); + !MF.getFunction().hasFnAttribute("interrupt") && + !MF.getSubtarget().hasStdExtZcmp(); } uint64_t getRVVStackSize() const { return RVVStackSize; } @@ -108,6 +111,9 @@ unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } + uint64_t getRVPushStackSize() const { return RVPushStackSize; } + void setRVPushStackSize(uint64_t Size) { RVPushStackSize = Size; } + void initializeBaseYamlFields(const yaml::RISCVMachineFunctionInfo &YamlMFI); }; diff --git a/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMoveOptimizer.cpp @@ -0,0 +1,248 @@ +//===- RISCVMoveOptimizer.cpp - RISCV move opt. pass -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that performs move related peephole +// optimizations. This pass should be run after register allocation. +// +//===----------------------------------------------------------------------===// + + +#include "RISCVInstrInfo.h" +#include "RISCVMachineFunctionInfo.h" + +using namespace llvm; + +#define RISCV_MOVE_OPT_NAME "RISC-V Zce move merging pass" + +namespace{ +struct RISCVMoveOpt : public MachineFunctionPass{ + static char ID; + + RISCVMoveOpt(): MachineFunctionPass(ID) { + initializeRISCVMoveOptPass(*PassRegistry::getPassRegistry()); + } + + const RISCVInstrInfo *TII; + const TargetRegisterInfo *TRI; + const RISCVSubtarget *Subtarget; + + // Track which register units have been modified and used. + LiveRegUnits ModifiedRegUnits, UsedRegUnits; + + bool isCandidateToMergeMVA01S(DestSourcePair &RegPair); + bool isCandidateToMergeMVSA01(DestSourcePair &RegPair); + // Merge the two instructions indicated into a single pair instruction. + MachineBasicBlock::iterator + mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + unsigned Opcode); + + //Look for C.MV instruction that can be combined with + //the given instruction into CM.MVA01S or CM.MVSA01. Return the matching instruction if + //one exists. + MachineBasicBlock::iterator findMatchingInst(MachineBasicBlock::iterator &MBBI, unsigned InstOpcode); + bool MovOpt(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return RISCV_MOVE_OPT_NAME; } +}; + +char RISCVMoveOpt::ID = 0; + +} //end of anonymous namespace + +INITIALIZE_PASS(RISCVMoveOpt, "riscv-mov-opt", RISCV_MOVE_OPT_NAME, false, + false) + +// Check if registers meet CM.MVA01S constraints. +bool RISCVMoveOpt::isCandidateToMergeMVA01S(DestSourcePair &RegPair){ + Register Destination = RegPair.Destination->getReg(); + Register Source = RegPair.Source->getReg(); + const TargetRegisterClass *SourceRC = TRI->getMinimalPhysRegClass(Source); + // If destination is not a0 or a1. + if (Destination == RISCV::X10 || Destination == RISCV::X11) + if (RISCV::SR07RegClass.hasSubClassEq(SourceRC)) + return true; + return false; +} + +// Check if registers meet CM.MVSA01 constraints. +bool RISCVMoveOpt::isCandidateToMergeMVSA01(DestSourcePair &RegPair){ + Register Destination = RegPair.Destination->getReg(); + Register Source = RegPair.Source->getReg(); + const TargetRegisterClass *DestinationRC = TRI->getMinimalPhysRegClass(Destination); + // If Source is s0 - s7. + if (RISCV::SR07RegClass.hasSubClassEq(DestinationRC)) + if (Source == RISCV::X10 || Source == RISCV::X11) + return true; + return false; +} + +MachineBasicBlock::iterator +RISCVMoveOpt::mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + unsigned Opcode) { + const MachineOperand *Sreg1, *Sreg2; + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); + DestSourcePair FirstPair = TII->isCopyInstrImpl(*I).getValue(); + DestSourcePair PairedRegs = TII->isCopyInstrImpl(*Paired).getValue(); + Register ARegInFirstPair = Opcode == RISCV::CM_MVA01S ? + FirstPair.Destination->getReg() + : FirstPair.Source->getReg(); + + if (NextI == Paired) + NextI = next_nodbg(NextI, E); + DebugLoc DL = I->getDebugLoc(); + + // The order of S-reg depends on which instruction holds A0, instead of + // the order of register pair. + // e,g. + // mv a1, s1 + // mv a0, s2 => cm.mva01s s2,s1 + // + // mv a0, s2 + // mv a1, s1 => cm.mva01s s2,s1 + if (Opcode == RISCV::CM_MVA01S) { + Sreg1 = ARegInFirstPair == RISCV::X10 ? + FirstPair.Source : PairedRegs.Source; + Sreg2 = ARegInFirstPair == RISCV::X10 ? + PairedRegs.Source : FirstPair.Source; + } else { + Sreg1 = ARegInFirstPair == RISCV::X10 ? + FirstPair.Destination : PairedRegs.Destination; + Sreg2 = ARegInFirstPair == RISCV::X10 ? + PairedRegs.Destination : FirstPair.Destination; + } + + BuildMI(*I->getParent(), I, DL, TII->get(Opcode)) + .add(*Sreg1) + .add(*Sreg2); + + I->eraseFromParent(); + Paired->eraseFromParent(); + return NextI; +} + +MachineBasicBlock::iterator +RISCVMoveOpt::findMatchingInst(MachineBasicBlock::iterator &MBBI, unsigned InstOpcode){ + MachineBasicBlock::iterator E = MBBI->getParent()->end(); + DestSourcePair FirstPair = TII->isCopyInstrImpl(*MBBI).getValue(); + + // Track which register units have been modified and used between the first + // insn and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + + for (MachineBasicBlock::iterator I = next_nodbg(MBBI,E); I != E; + I = next_nodbg(I, E)){ + + MachineInstr &MI = *I; + + if (auto SecondPair = TII->isCopyInstrImpl(MI)) { + Register SourceReg = SecondPair->Source->getReg(); + Register DestReg = SecondPair->Destination->getReg(); + + if (InstOpcode == RISCV::CM_MVA01S && isCandidateToMergeMVA01S(*SecondPair)) { + // If register pair is valid and destination registers are different. + if ((FirstPair.Destination->getReg() == DestReg)) + return E; + + // If paired destination register was modified or used, there is no possibility + // of finding matching instruction so exit early. + if (!ModifiedRegUnits.available(DestReg) || !UsedRegUnits.available(DestReg)) + return E; + + // We need to check if the source register in the second paired instruction is + // defined in between. + if (ModifiedRegUnits.available(SourceReg)) + return I; + + } else if (InstOpcode == RISCV::CM_MVSA01 && isCandidateToMergeMVSA01(*SecondPair)) { + if ((FirstPair.Source->getReg() == SourceReg) || + (FirstPair.Destination->getReg() == DestReg)) + return E; + + if (!ModifiedRegUnits.available(SourceReg) || !UsedRegUnits.available(SourceReg)) + return E; + + // As for mvsa01, we need to make sure the dest register of the second paired + // instruction is not used in between, since we would move its definition ahead. + if (UsedRegUnits.available(DestReg)) + return I; + } + } + // Update modified / used register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + } + return E; +} + +// Finds instructions, which could be represented as C.MV instructions and merged into CM.MVA01S or CM.MVSA01. +bool RISCVMoveOpt::MovOpt(MachineBasicBlock &MBB){ + bool Modified = false; + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + // Check if the instruction can be compressed to C.MV instruction. If it can, return Dest/Src + // register pair. + auto RegPair = TII->isCopyInstrImpl(*MBBI); + if(RegPair.hasValue()) { + unsigned Opcode = 0; + + if (isCandidateToMergeMVA01S(*RegPair)) + Opcode = RISCV::CM_MVA01S; + else if (isCandidateToMergeMVSA01(*RegPair)) + Opcode = RISCV::CM_MVSA01; + else { + ++MBBI; + continue; + } + + MachineBasicBlock::iterator Paired = findMatchingInst(MBBI, Opcode); + //If matching instruction could be found merge them. + if (Paired != E) { + MBBI = mergePairedInsns(MBBI, Paired, Opcode); + Modified = true; + continue; + } + } + ++MBBI; + } + return Modified; +} + + +bool RISCVMoveOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + + Subtarget = &static_cast(Fn.getSubtarget()); + if (!Subtarget->hasStdExtZcmp()) { + return false; + } + + TII = static_cast(Subtarget->getInstrInfo()); + TRI = Subtarget->getRegisterInfo(); + // Resize the modified and used register unit trackers. We do this once + // per function and then clear the register units each time we optimize a + // move. + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + bool Modified = false; + for (auto &MBB : Fn) { + Modified |= MovOpt(MBB); + } + return Modified; +} + +/// createRISCVMoveOptimizationPass - returns an instance of the +/// move optimization pass. +FunctionPass *llvm::createRISCVMoveOptimizationPass() { + return new RISCVMoveOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVOptimizePushPop.cpp b/llvm/lib/Target/RISCV/RISCVOptimizePushPop.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVOptimizePushPop.cpp @@ -0,0 +1,164 @@ +//===- RISCVOptimizePushPop.cpp - RISCV Push/Pop opt. pass -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that modifies PUSH/POP instructions from zcea +// standard to use their non prolog/epilog related functionalities +// and generates POPRET instruction. +// +//===----------------------------------------------------------------------===// + + +#include "RISCVInstrInfo.h" +#include "RISCVMachineFunctionInfo.h" + +using namespace llvm; + +#define RISCV_PUSH_POP_OPT_NAME "RISC-V Zce Push/Pop optimization pass" + + + +namespace{ + struct RISCVPushPopOpt: public MachineFunctionPass{ + static char ID; + + RISCVPushPopOpt(): MachineFunctionPass(ID) { + initializeRISCVPushPopOptPass(*PassRegistry::getPassRegistry()); + } + + const RISCVInstrInfo *TII; + const TargetRegisterInfo *TRI; + const RISCVSubtarget *Subtarget; + + // Track which register units have been modified and used. + LiveRegUnits ModifiedRegUnits, UsedRegUnits; + + bool usePopRet(MachineBasicBlock::iterator &MBBI); + bool adjustRetVal(MachineBasicBlock::iterator &MBBI); + bool runOnMachineFunction(MachineFunction &Fn) override; + + std::map retValMap; + + StringRef getPassName() const override { return RISCV_PUSH_POP_OPT_NAME; } + }; + + char RISCVPushPopOpt::ID = 0; + +} //end of anonymous namespace + +INITIALIZE_PASS(RISCVPushPopOpt, "riscv-push-pop-opt", RISCV_PUSH_POP_OPT_NAME, false, + false) + +// Check if POP instruction was inserted into the MBB and return iterator to it. +static MachineBasicBlock::iterator containsPop (MachineBasicBlock &MBB){ + for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBB.end(); + MBBI = next_nodbg(MBBI,MBB.end())) + if (MBBI->getOpcode() == RISCV::CM_POP) + return MBBI; + + return MBB.end(); +} + +bool RISCVPushPopOpt::usePopRet(MachineBasicBlock::iterator &MBBI){ + MachineBasicBlock::iterator E = MBBI->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(MBBI, E); + // Since Pseudo instruction lowering happen later in the pipeline, + // this will detect all ret instruction. + if(NextI->getOpcode() == RISCV::PseudoRET){ + DebugLoc DL = NextI->getDebugLoc(); + auto retValInfo = retValMap.find(&(*MBBI)); + if (retValInfo == retValMap.end()) + BuildMI(*NextI->getParent(), NextI, DL, TII->get(RISCV::CM_POPRET)) + .add(MBBI->getOperand(0)) + .add(MBBI->getOperand(1)); + else if (retValInfo->second == 0) + BuildMI(*NextI->getParent(), NextI, DL, TII->get(RISCV::CM_POPRETZ)) + .add(MBBI->getOperand(0)) + .add(MBBI->getOperand(1)); + // If the return value is not 0 then POPRETZ is not used. + else + return false; + MBBI->eraseFromParent(); + NextI->eraseFromParent(); + return true; + } + return false; +} + +// Search for last assignment to a0 and if possible use ret_val slot of POP to store return value. +bool RISCVPushPopOpt::adjustRetVal(MachineBasicBlock::iterator &MBBI){ + MachineBasicBlock::reverse_iterator RE = MBBI->getParent()->rend(); + // Track which register units have been modified and used between the POP + // insn and the last assignment to register a0. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + retValMap.clear(); + Register A0(RISCV::X10); + // Since POP instruction is in Epilogue no normal instructions will follow after it. + // Therefore search only previous ones to find the return value. + for (MachineBasicBlock::reverse_iterator I = next_nodbg(MBBI.getReverse(), RE); I != RE; + I = next_nodbg(I, RE)){ + MachineInstr &MI = *I; + if (auto OperandPair = TII->isLoadImmImpl(MI)){ + Register DestReg = OperandPair->Destination->getReg(); + int64_t RetVal = OperandPair->Source->getImm(); + if (DestReg == RISCV::X10) { + switch (RetVal) + { + default: + return false; + case 0: + retValMap[&(*MBBI)] = 0; + } + MI.removeFromParent(); + return true; + } + } + // Update modified / used register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + // If a0 was modified or used, there is no possibility + // of using ret_val slot of POP instruction. + if (!ModifiedRegUnits.available(A0) || !UsedRegUnits.available(A0)) + return false; + } + return false; +} + +bool RISCVPushPopOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + + // If Zcea extension is not supported abort. + Subtarget = &static_cast(Fn.getSubtarget()); + if (!Subtarget->hasStdExtZcmp()) { + return false; + } + TII = static_cast(Subtarget->getInstrInfo()); + TRI = Subtarget->getRegisterInfo(); + // Resize the modified and used register unit trackers. We do this once + // per function and then clear the register units each time we determine + // correct return value for the POP. + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + bool Modified = false; + for (auto &MBB : Fn) { + auto MBBI = containsPop(MBB); + if (MBBI != MBB.end()){ + Modified |= adjustRetVal(MBBI); + if (MBB.isReturnBlock()) + Modified |= usePopRet(MBBI); + } + } + return Modified; + +} + +/// createRISCVPushPopOptimizationPass - returns an instance of the +/// Push/Pop optimization pass. +FunctionPass *llvm::createRISCVPushPopOptimizationPass() { + return new RISCVPushPopOpt(); +} \ No newline at end of file diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -177,6 +177,24 @@ let RegInfos = XLenRI; } +// Saved Registers from s0 to s7, for C.MVA01S07 instruction in Zce extension +def SR07 : RegisterClass<"RISCV", [XLenVT], 32, (add + (sequence "X%u", 8, 9), + (sequence "X%u", 18, 23) + )> { + let RegInfos = XLenRI; +} + +// Registers saveable by PUSH instruction in Zce extension +def PGPR : RegisterClass<"RISCV", [XLenVT], 32, (add + (sequence "X%u", 8, 9), + (sequence "X%u", 18, 25), + X27, + X1 + )> { + let RegInfos = XLenRI; +} + // Floating point registers let RegAltNameIndices = [ABIRegAltName] in { def F0_H : RISCVReg16<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -60,6 +60,9 @@ bool HasStdExtZbr = false; bool HasStdExtZbs = false; bool HasStdExtZbt = false; + bool HasStdExtZca = false; + bool HasStdExtZcb = false; + bool HasStdExtZcmp = false; bool HasStdExtV = false; bool HasStdExtZve32x = false; bool HasStdExtZve32f = false; @@ -166,6 +169,9 @@ bool hasStdExtZbt() const { return HasStdExtZbt; } bool hasStdExtZvl() const { return ZvlLen != 0; } bool hasStdExtZvfh() const { return HasStdExtZvfh; } + bool hasStdExtZca() const { return HasStdExtZca; } + bool hasStdExtZcb() const { return HasStdExtZcb; } + bool hasStdExtZcmp() const { return HasStdExtZcmp; } bool hasStdExtZfhmin() const { return HasStdExtZfhmin; } bool hasStdExtZfh() const { return HasStdExtZfh; } bool hasStdExtZfinx() const { return HasStdExtZfinx; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -53,6 +53,8 @@ initializeRISCVSExtWRemovalPass(*PR); initializeRISCVExpandPseudoPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); + initializeRISCVMoveOptPass(*PR); + initializeRISCVPushPopOptPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { @@ -236,6 +238,10 @@ } void RISCVPassConfig::addPreEmitPass2() { + addPass(createRISCVMoveOptimizationPass()); + // Schedule PushPop Optimization before expansion of Pseudo instruction, + // ensuring return instruction is detected correctly. + addPass(createRISCVPushPopOptimizationPass()); addPass(createRISCVExpandPseudoPass()); // Schedule the expansion of AMOs at the last possible moment, avoiding the // possibility for other passes to break the requirements for forward diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -57,6 +57,8 @@ ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: RISC-V Zce move merging pass +; CHECK-NEXT: RISC-V Zce Push/Pop optimization pass ; CHECK-NEXT: RISCV pseudo instruction expansion pass ; CHECK-NEXT: RISCV atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -158,6 +158,8 @@ ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Outliner ; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: RISC-V Zce move merging pass +; CHECK-NEXT: RISC-V Zce Push/Pop optimization pass ; CHECK-NEXT: RISCV pseudo instruction expansion pass ; CHECK-NEXT: RISCV atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -39,6 +39,10 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zicbom %s -o - | FileCheck --check-prefix=RV32ZICBOM %s ; RUN: llc -mtriple=riscv32 -mattr=+zicboz %s -o - | FileCheck --check-prefix=RV32ZICBOZ %s ; RUN: llc -mtriple=riscv32 -mattr=+zicbop %s -o - | FileCheck --check-prefix=RV32ZICBOP %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zca %s -o - | FileCheck --check-prefix=RV32ZCA %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcb %s -o - | FileCheck --check-prefix=RV32ZCB %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp %s -o - | FileCheck --check-prefix=RV32ZCMP %s + ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s ; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefix=RV64A %s ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefix=RV64F %s @@ -78,6 +82,9 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zicbom %s -o - | FileCheck --check-prefix=RV64ZICBOM %s ; RUN: llc -mtriple=riscv64 -mattr=+zicboz %s -o - | FileCheck --check-prefix=RV64ZICBOZ %s ; RUN: llc -mtriple=riscv64 -mattr=+zicbop %s -o - | FileCheck --check-prefix=RV64ZICBOP %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zca %s -o - | FileCheck --check-prefix=RV64ZCA %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcb %s -o - | FileCheck --check-prefix=RV64ZCB %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp %s -o - | FileCheck --check-prefix=RV64ZCMP %s ; RV32M: .attribute 5, "rv32i2p0_m2p0" ; RV32A: .attribute 5, "rv32i2p0_a2p0" @@ -118,6 +125,9 @@ ; RV32ZICBOM: .attribute 5, "rv32i2p0_zicbom1p0" ; RV32ZICBOZ: .attribute 5, "rv32i2p0_zicboz1p0" ; RV32ZICBOP: .attribute 5, "rv32i2p0_zicbop1p0" +; RV32ZCA: .attribute 5, "rv32i2p0_zca0p70" +; RV32ZCB: .attribute 5, "rv32i2p0_zca0p70_zcb0p70" +; RV32ZCMP: .attribute 5, "rv32i2p0_zca0p70_zcmp0p70" ; RV64M: .attribute 5, "rv64i2p0_m2p0" ; RV64A: .attribute 5, "rv64i2p0_a2p0" @@ -158,6 +168,9 @@ ; RV64ZICBOM: .attribute 5, "rv64i2p0_zicbom1p0" ; RV64ZICBOZ: .attribute 5, "rv64i2p0_zicboz1p0" ; RV64ZICBOP: .attribute 5, "rv64i2p0_zicbop1p0" +; RV64ZCA: .attribute 5, "rv64i2p0_zca0p70" +; RV64ZCB: .attribute 5, "rv64i2p0_zca0p70_zcb0p70" +; RV64ZCMP: .attribute 5, "rv64i2p0_zca0p70_zcmp0p70" define i32 @addi(i32 %a) { %1 = add i32 %a, 1 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -9,6 +9,8 @@ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -frame-pointer=all < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I-WITH-FP +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32IZCMP ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi lp64f -verify-machineinstrs < %s \ @@ -19,6 +21,8 @@ ; RUN: | FileCheck %s -check-prefix=RV64I ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -frame-pointer=all < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I-WITH-FP +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64IZCMP @var = global [32 x i32] zeroinitializer @@ -249,6 +253,89 @@ ; RV32I-WITH-FP-NEXT: addi sp, sp, 80 ; RV32I-WITH-FP-NEXT: ret ; +; RV32IZCMP-LABEL: callee: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -80 +; RV32IZCMP-NEXT: lui a7, %hi(var) +; RV32IZCMP-NEXT: lw a0, %lo(var)(a7) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+4)(a7) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+8)(a7) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+12)(a7) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi a5, a7, %lo(var) +; RV32IZCMP-NEXT: lw a0, 16(a5) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(a5) +; RV32IZCMP-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw t0, 24(a5) +; RV32IZCMP-NEXT: lw t1, 28(a5) +; RV32IZCMP-NEXT: lw t2, 32(a5) +; RV32IZCMP-NEXT: lw t3, 36(a5) +; RV32IZCMP-NEXT: lw t4, 40(a5) +; RV32IZCMP-NEXT: lw t5, 44(a5) +; RV32IZCMP-NEXT: lw t6, 48(a5) +; RV32IZCMP-NEXT: lw s0, 52(a5) +; RV32IZCMP-NEXT: lw s1, 56(a5) +; RV32IZCMP-NEXT: lw s2, 60(a5) +; RV32IZCMP-NEXT: lw s3, 64(a5) +; RV32IZCMP-NEXT: lw s4, 68(a5) +; RV32IZCMP-NEXT: lw s5, 72(a5) +; RV32IZCMP-NEXT: lw s6, 76(a5) +; RV32IZCMP-NEXT: lw s7, 80(a5) +; RV32IZCMP-NEXT: lw s8, 84(a5) +; RV32IZCMP-NEXT: lw s9, 88(a5) +; RV32IZCMP-NEXT: lw s10, 92(a5) +; RV32IZCMP-NEXT: lw s11, 96(a5) +; RV32IZCMP-NEXT: lw ra, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a0, 124(a5) +; RV32IZCMP-NEXT: lw a1, 120(a5) +; RV32IZCMP-NEXT: lw a2, 116(a5) +; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: sw a0, 124(a5) +; RV32IZCMP-NEXT: sw a1, 120(a5) +; RV32IZCMP-NEXT: sw a2, 116(a5) +; RV32IZCMP-NEXT: sw a3, 112(a5) +; RV32IZCMP-NEXT: sw a4, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw ra, 100(a5) +; RV32IZCMP-NEXT: sw s11, 96(a5) +; RV32IZCMP-NEXT: sw s10, 92(a5) +; RV32IZCMP-NEXT: sw s9, 88(a5) +; RV32IZCMP-NEXT: sw s8, 84(a5) +; RV32IZCMP-NEXT: sw s7, 80(a5) +; RV32IZCMP-NEXT: sw s6, 76(a5) +; RV32IZCMP-NEXT: sw s5, 72(a5) +; RV32IZCMP-NEXT: sw s4, 68(a5) +; RV32IZCMP-NEXT: sw s3, 64(a5) +; RV32IZCMP-NEXT: sw s2, 60(a5) +; RV32IZCMP-NEXT: sw s1, 56(a5) +; RV32IZCMP-NEXT: sw s0, 52(a5) +; RV32IZCMP-NEXT: sw t6, 48(a5) +; RV32IZCMP-NEXT: sw t5, 44(a5) +; RV32IZCMP-NEXT: sw t4, 40(a5) +; RV32IZCMP-NEXT: sw t3, 36(a5) +; RV32IZCMP-NEXT: sw t2, 32(a5) +; RV32IZCMP-NEXT: sw t1, 28(a5) +; RV32IZCMP-NEXT: sw t0, 24(a5) +; RV32IZCMP-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(a5) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(a5) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+12)(a7) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+8)(a7) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+4)(a7) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var)(a7) +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 80 +; ; RV64I-LABEL: callee: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -160 @@ -471,6 +558,89 @@ ; RV64I-WITH-FP-NEXT: ld s11, 56(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: addi sp, sp, 160 ; RV64I-WITH-FP-NEXT: ret +; +; RV64IZCMP-LABEL: callee: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: lui a7, %hi(var) +; RV64IZCMP-NEXT: lw a0, %lo(var)(a7) +; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+4)(a7) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+8)(a7) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+12)(a7) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi a5, a7, %lo(var) +; RV64IZCMP-NEXT: lw a0, 16(a5) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(a5) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw t0, 24(a5) +; RV64IZCMP-NEXT: lw t1, 28(a5) +; RV64IZCMP-NEXT: lw t2, 32(a5) +; RV64IZCMP-NEXT: lw t3, 36(a5) +; RV64IZCMP-NEXT: lw t4, 40(a5) +; RV64IZCMP-NEXT: lw t5, 44(a5) +; RV64IZCMP-NEXT: lw t6, 48(a5) +; RV64IZCMP-NEXT: lw s0, 52(a5) +; RV64IZCMP-NEXT: lw s1, 56(a5) +; RV64IZCMP-NEXT: lw s2, 60(a5) +; RV64IZCMP-NEXT: lw s3, 64(a5) +; RV64IZCMP-NEXT: lw s4, 68(a5) +; RV64IZCMP-NEXT: lw s5, 72(a5) +; RV64IZCMP-NEXT: lw s6, 76(a5) +; RV64IZCMP-NEXT: lw s7, 80(a5) +; RV64IZCMP-NEXT: lw s8, 84(a5) +; RV64IZCMP-NEXT: lw s9, 88(a5) +; RV64IZCMP-NEXT: lw s10, 92(a5) +; RV64IZCMP-NEXT: lw s11, 96(a5) +; RV64IZCMP-NEXT: lw ra, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a0, 124(a5) +; RV64IZCMP-NEXT: lw a1, 120(a5) +; RV64IZCMP-NEXT: lw a2, 116(a5) +; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: sw a0, 124(a5) +; RV64IZCMP-NEXT: sw a1, 120(a5) +; RV64IZCMP-NEXT: sw a2, 116(a5) +; RV64IZCMP-NEXT: sw a3, 112(a5) +; RV64IZCMP-NEXT: sw a4, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw ra, 100(a5) +; RV64IZCMP-NEXT: sw s11, 96(a5) +; RV64IZCMP-NEXT: sw s10, 92(a5) +; RV64IZCMP-NEXT: sw s9, 88(a5) +; RV64IZCMP-NEXT: sw s8, 84(a5) +; RV64IZCMP-NEXT: sw s7, 80(a5) +; RV64IZCMP-NEXT: sw s6, 76(a5) +; RV64IZCMP-NEXT: sw s5, 72(a5) +; RV64IZCMP-NEXT: sw s4, 68(a5) +; RV64IZCMP-NEXT: sw s3, 64(a5) +; RV64IZCMP-NEXT: sw s2, 60(a5) +; RV64IZCMP-NEXT: sw s1, 56(a5) +; RV64IZCMP-NEXT: sw s0, 52(a5) +; RV64IZCMP-NEXT: sw t6, 48(a5) +; RV64IZCMP-NEXT: sw t5, 44(a5) +; RV64IZCMP-NEXT: sw t4, 40(a5) +; RV64IZCMP-NEXT: sw t3, 36(a5) +; RV64IZCMP-NEXT: sw t2, 32(a5) +; RV64IZCMP-NEXT: sw t1, 28(a5) +; RV64IZCMP-NEXT: sw t0, 24(a5) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(a5) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(a5) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+12)(a7) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+8)(a7) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+4)(a7) +; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var)(a7) +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 %val = load [32 x i32], [32 x i32]* @var store volatile [32 x i32] %val, [32 x i32]* @var ret void @@ -769,6 +939,124 @@ ; RV32I-WITH-FP-NEXT: addi sp, sp, 144 ; RV32I-WITH-FP-NEXT: ret ; +; RV32IZCMP-LABEL: caller: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -112 +; RV32IZCMP-NEXT: addi sp, sp, -32 +; RV32IZCMP-NEXT: lui s0, %hi(var) +; RV32IZCMP-NEXT: lw a0, %lo(var)(s0) +; RV32IZCMP-NEXT: sw a0, 88(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+4)(s0) +; RV32IZCMP-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+8)(s0) +; RV32IZCMP-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var+12)(s0) +; RV32IZCMP-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi s5, s0, %lo(var) +; RV32IZCMP-NEXT: lw a0, 16(s5) +; RV32IZCMP-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(s5) +; RV32IZCMP-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 24(s5) +; RV32IZCMP-NEXT: sw a0, 64(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 28(s5) +; RV32IZCMP-NEXT: sw a0, 60(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 32(s5) +; RV32IZCMP-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 36(s5) +; RV32IZCMP-NEXT: sw a0, 52(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 40(s5) +; RV32IZCMP-NEXT: sw a0, 48(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 44(s5) +; RV32IZCMP-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 48(s5) +; RV32IZCMP-NEXT: sw a0, 40(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 52(s5) +; RV32IZCMP-NEXT: sw a0, 36(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 56(s5) +; RV32IZCMP-NEXT: sw a0, 32(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 60(s5) +; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 64(s5) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 68(s5) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 72(s5) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 76(s5) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 80(s5) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 84(s5) +; RV32IZCMP-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw s3, 88(s5) +; RV32IZCMP-NEXT: lw s4, 92(s5) +; RV32IZCMP-NEXT: lw s6, 96(s5) +; RV32IZCMP-NEXT: lw s7, 100(s5) +; RV32IZCMP-NEXT: lw s8, 104(s5) +; RV32IZCMP-NEXT: lw s9, 108(s5) +; RV32IZCMP-NEXT: lw s10, 112(s5) +; RV32IZCMP-NEXT: lw s11, 116(s5) +; RV32IZCMP-NEXT: lw s1, 120(s5) +; RV32IZCMP-NEXT: lw s2, 124(s5) +; RV32IZCMP-NEXT: call callee@plt +; RV32IZCMP-NEXT: sw s2, 124(s5) +; RV32IZCMP-NEXT: sw s1, 120(s5) +; RV32IZCMP-NEXT: sw s11, 116(s5) +; RV32IZCMP-NEXT: sw s10, 112(s5) +; RV32IZCMP-NEXT: sw s9, 108(s5) +; RV32IZCMP-NEXT: sw s8, 104(s5) +; RV32IZCMP-NEXT: sw s7, 100(s5) +; RV32IZCMP-NEXT: sw s6, 96(s5) +; RV32IZCMP-NEXT: sw s4, 92(s5) +; RV32IZCMP-NEXT: sw s3, 88(s5) +; RV32IZCMP-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 84(s5) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 80(s5) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 76(s5) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 72(s5) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 68(s5) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 64(s5) +; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 60(s5) +; RV32IZCMP-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 56(s5) +; RV32IZCMP-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 52(s5) +; RV32IZCMP-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 48(s5) +; RV32IZCMP-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 44(s5) +; RV32IZCMP-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 40(s5) +; RV32IZCMP-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 36(s5) +; RV32IZCMP-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 32(s5) +; RV32IZCMP-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 28(s5) +; RV32IZCMP-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 24(s5) +; RV32IZCMP-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(s5) +; RV32IZCMP-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(s5) +; RV32IZCMP-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+12)(s0) +; RV32IZCMP-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+8)(s0) +; RV32IZCMP-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var+4)(s0) +; RV32IZCMP-NEXT: lw a0, 88(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var)(s0) +; RV32IZCMP-NEXT: addi sp, sp, 32 +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 112 +; ; RV64I-LABEL: caller: ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -288 @@ -1057,7 +1345,124 @@ ; RV64I-WITH-FP-NEXT: ld s11, 184(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: addi sp, sp, 288 ; RV64I-WITH-FP-NEXT: ret - +; +; RV64IZCMP-LABEL: caller: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: addi sp, sp, -128 +; RV64IZCMP-NEXT: lui s0, %hi(var) +; RV64IZCMP-NEXT: lw a0, %lo(var)(s0) +; RV64IZCMP-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+4)(s0) +; RV64IZCMP-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+8)(s0) +; RV64IZCMP-NEXT: sd a0, 160(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var+12)(s0) +; RV64IZCMP-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi s5, s0, %lo(var) +; RV64IZCMP-NEXT: lw a0, 16(s5) +; RV64IZCMP-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(s5) +; RV64IZCMP-NEXT: sd a0, 136(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 24(s5) +; RV64IZCMP-NEXT: sd a0, 128(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 28(s5) +; RV64IZCMP-NEXT: sd a0, 120(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 32(s5) +; RV64IZCMP-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 36(s5) +; RV64IZCMP-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 40(s5) +; RV64IZCMP-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 44(s5) +; RV64IZCMP-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 48(s5) +; RV64IZCMP-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 52(s5) +; RV64IZCMP-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 56(s5) +; RV64IZCMP-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 60(s5) +; RV64IZCMP-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 64(s5) +; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 68(s5) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 72(s5) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 76(s5) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 80(s5) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 84(s5) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw s3, 88(s5) +; RV64IZCMP-NEXT: lw s4, 92(s5) +; RV64IZCMP-NEXT: lw s6, 96(s5) +; RV64IZCMP-NEXT: lw s7, 100(s5) +; RV64IZCMP-NEXT: lw s8, 104(s5) +; RV64IZCMP-NEXT: lw s9, 108(s5) +; RV64IZCMP-NEXT: lw s10, 112(s5) +; RV64IZCMP-NEXT: lw s11, 116(s5) +; RV64IZCMP-NEXT: lw s1, 120(s5) +; RV64IZCMP-NEXT: lw s2, 124(s5) +; RV64IZCMP-NEXT: call callee@plt +; RV64IZCMP-NEXT: sw s2, 124(s5) +; RV64IZCMP-NEXT: sw s1, 120(s5) +; RV64IZCMP-NEXT: sw s11, 116(s5) +; RV64IZCMP-NEXT: sw s10, 112(s5) +; RV64IZCMP-NEXT: sw s9, 108(s5) +; RV64IZCMP-NEXT: sw s8, 104(s5) +; RV64IZCMP-NEXT: sw s7, 100(s5) +; RV64IZCMP-NEXT: sw s6, 96(s5) +; RV64IZCMP-NEXT: sw s4, 92(s5) +; RV64IZCMP-NEXT: sw s3, 88(s5) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 84(s5) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 80(s5) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 76(s5) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 72(s5) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 68(s5) +; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 64(s5) +; RV64IZCMP-NEXT: ld a0, 56(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 60(s5) +; RV64IZCMP-NEXT: ld a0, 64(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 56(s5) +; RV64IZCMP-NEXT: ld a0, 72(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 52(s5) +; RV64IZCMP-NEXT: ld a0, 80(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 48(s5) +; RV64IZCMP-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 44(s5) +; RV64IZCMP-NEXT: ld a0, 96(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 40(s5) +; RV64IZCMP-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 36(s5) +; RV64IZCMP-NEXT: ld a0, 112(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 32(s5) +; RV64IZCMP-NEXT: ld a0, 120(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 28(s5) +; RV64IZCMP-NEXT: ld a0, 128(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 24(s5) +; RV64IZCMP-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(s5) +; RV64IZCMP-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(s5) +; RV64IZCMP-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+12)(s0) +; RV64IZCMP-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+8)(s0) +; RV64IZCMP-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var+4)(s0) +; RV64IZCMP-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var)(s0) +; RV64IZCMP-NEXT: addi sp, sp, 128 +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 %val = load [32 x i32], [32 x i32]* @var call void @callee() store volatile [32 x i32] %val, [32 x i32]* @var diff --git a/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll b/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/cm_mvas_mvsa.ll @@ -0,0 +1,163 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=VALID,VALID32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=VALID + + +; Function Attrs: nounwind +define dso_local i32 @cmva(i32 %num, i32 %f, i32 %d, i32 %dx) local_unnamed_addr #0 { +; VALID-LABEL: cmva: +; VALID: cm.mva01s {{s[0-7]}}, {{s[0-7]}} +; VALID-NOT: cm.mva01s {{a.}}, {{a.}} +entry: + %mul = mul nsw i32 %dx, %d + %sub = sub nsw i32 %mul, %dx + %add = add nsw i32 %mul, %d + %mul2 = mul nsw i32 %sub, %dx + %add3 = add nsw i32 %add, %mul2 + %mul4 = mul nsw i32 %add3, %d + %add6 = add nsw i32 %add3, %num + %add5 = add i32 %sub, %f + %add7 = add i32 %add5, %mul4 + ret i32 %add7 +} + +declare i64 @llvm.cttz.i64(i64, i1 immarg) + +define i64 @cmvs32(i64 %a) nounwind { +; VALID32-LABEL: cmvs32: +; VALID32: cm.mvsa01 {{s[0-7]}}, {{s[0-7]}} +; VALID32-NOT: cm.mvsa01 {{a.}}, {{a.}} + %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %tmp +} + +declare void @hoge() +define void @cmvs64(i32 signext %arg, i32 signext %arg1) nounwind { +; VALID64-LABEL: cmvs: +; VALID64: cm.mvsa01 {{s[0-7]}}, {{s[0-7]}} +; VALID64-NOT: cm.mvsa01 {{a.}}, {{a.}} +bb: + %tmp = icmp eq i32 %arg, %arg1 + br i1 %tmp, label %bb6, label %bb2 + +bb2: ; preds = %bb2, %bb + %tmp3 = phi i32 [ %tmp4, %bb2 ], [ %arg, %bb ] + tail call void @hoge() + %tmp4 = add nsw i32 %tmp3, 1 + %tmp5 = icmp eq i32 %tmp4, %arg1 + br i1 %tmp5, label %bb6, label %bb2 + +bb6: ; preds = %bb2, %bb + ret void +} + +%struct.trie = type { [26 x %struct.trie*], i8 } + +@word = external global i8* + +declare i32 @trie_new(%struct.trie*) +declare i32 @trie_search(i8*, i32, %struct.trie**) +declare i64 @strnlen(i8*, i64) + +; Function Attrs: nounwind optnone +define i32 @mvas_2() { + ; VALID64-LABEL: mvas_2: + ; VALID64-NOT: cm.mva01s {{a.}}, {{s.}} + ; VALID64-NOT: cm.mva01s {{s.}}, {{a.}} +entry: + %trie = alloca %struct.trie* + %0 = bitcast %struct.trie** %trie to i8* + store %struct.trie* null, %struct.trie** %trie + %call = tail call i32 @trie_new(%struct.trie* null) + %1 = load i8*, i8** @word + %call1 = tail call i64 @strnlen(i8* %1, i64 100) + %conv = trunc i64 %call1 to i32 + %call2 = call i32 @trie_search(i8* %1, i32 %conv, %struct.trie** %trie) + ret i32 %call2 +} + +declare i32 @foo(i32, i32) + +; Function Attrs: nounwind optnone +define dso_local i32 @cm_mvas_same_src(i32 %0, i32 %1, i32 %2, i32 %3) { + ; VALID32-LABEL: cm_mvas_same_src: + ; VALID32: cm.mva01s s0, s0 + ; + ; VALID64-LABEL: cm_mvas_same_src: + ; VALID64: cm.mva01s s0, s0 +entry: + %4 = call i32 @foo(i32 %3, i32 %2) + %5 = add i32 %4, %2 + %6 = call i32 @foo(i32 %3, i32 %3) + %add = add i32 %5, %6 + ret i32 %add +} + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } +%struct.Node = type { i8*, i64, %struct.Node*, %struct.Node* } + +declare i8* @malloc(i64) + +declare i32 @fgetc(%struct._IO_FILE*nocapture) + +declare %struct.Node* @addWordToTree(i8*, %struct.Node*) + +; Function Attrs: nounwind optsize +define %struct.Node* @cmmv_a1_come_first(%struct._IO_FILE*nocapture %file, %struct.Node* %root) { + ; VALID32-LABEL: cmmv_a1_come_first: + ; VALID32: cm.push {ra, s0-s5}, -32 + ; VALID32-NEXT: .cfi_offset ra, -4 + ; VALID32-NEXT: .cfi_offset s0, -8 + ; VALID32-NEXT: .cfi_offset s1, -12 + ; VALID32-NEXT: .cfi_offset s2, -16 + ; VALID32-NEXT: .cfi_offset s3, -20 + ; VALID32-NEXT: .cfi_offset s4, -24 + ; VALID32-NEXT: .cfi_offset s5, -28 + ; VALID32-NEXT: cm.mvsa01 s1, s0 + + ; VALID64-LABEL: cmmv_a1_come_first: + ; VALID64: cm.push {ra, s0-s5}, -64 + ; VALID64-NEXT: .cfi_offset ra, -8 + ; VALID64-NEXT: .cfi_offset s0, -16 + ; VALID64-NEXT: .cfi_offset s1, -24 + ; VALID64-NEXT: .cfi_offset s2, -32 + ; VALID64-NEXT: .cfi_offset s3, -40 + ; VALID64-NEXT: .cfi_offset s4, -48 + ; VALID64-NEXT: .cfi_offset s5, -56 + ; VALID64-NEXT: cm.mvsa01 s2, s0 +entry: + %call = tail call dereferenceable_or_null(46) i8* @malloc(i64 46) + %arrayidx = getelementptr inbounds i8, i8* %call, i64 -1 + %call117 = tail call i32 @fgetc(%struct._IO_FILE* %file) + %sext.mask18 = and i32 %call117, 255 + %cmp.not19 = icmp eq i32 %sext.mask18, 255 + br i1 %cmp.not19, label %while.end, label %land.lhs.true.preheader + +land.lhs.true.preheader: + %arrayidx921 = getelementptr inbounds i8, i8* %call, i64 255 + store i8 0, i8* %arrayidx921 + %call1022 = tail call %struct.Node* @addWordToTree(i8* %call, %struct.Node* %root) + %call123 = tail call i32 @fgetc(%struct._IO_FILE* %file) + %sext.mask24 = and i32 %call123, 255 + %cmp.not25 = icmp eq i32 %sext.mask24, 255 + br i1 %cmp.not25, label %while.end, label %land.lhs.true.land.lhs.true_crit_edge + +land.lhs.true.land.lhs.true_crit_edge: + %call1026 = phi %struct.Node* [ %call10, %land.lhs.true.land.lhs.true_crit_edge ], [ %call1022, %land.lhs.true.preheader ] + %.pre = load i8, i8* %arrayidx + %cmp6.not = icmp eq i8 %.pre, 39 + %spec.select = select i1 %cmp6.not, i64 0, i64 255 + %arrayidx9 = getelementptr inbounds i8, i8* %call, i64 %spec.select + store i8 0, i8* %arrayidx9 + %call10 = tail call %struct.Node* @addWordToTree(i8* %call, %struct.Node* %call1026) + %call1 = tail call i32 @fgetc(%struct._IO_FILE* %file) + %sext.mask = and i32 %call1, 255 + %cmp.not = icmp eq i32 %sext.mask, 255 + br i1 %cmp.not, label %while.end, label %land.lhs.true.land.lhs.true_crit_edge + +while.end: + %root.addr.0.lcssa = phi %struct.Node* [ %root, %entry ], [ %call1022, %land.lhs.true.preheader ], [ %call10, %land.lhs.true.land.lhs.true_crit_edge ] + ret %struct.Node* %root.addr.0.lcssa +} diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll @@ -0,0 +1,1815 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update +; NOTE: Check cm.push/cm.pop. +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32IZCMP +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmp -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64IZCMP +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64I %s + +declare void @test(i8*) + +; Function Attrs: optnone +define i32 @foo() { +; RV32IZCMP-LABEL: foo: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra}, -64 +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 528 +; RV32IZCMP-NEXT: addi sp, sp, -464 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: addi a0, sp, 12 +; RV32IZCMP-NEXT: call test@plt +; RV32IZCMP-NEXT: addi sp, sp, 464 +; RV32IZCMP-NEXT: cm.popretz {ra}, 64 +; +; RV64IZCMP-LABEL: foo: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra}, -64 +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 528 +; RV64IZCMP-NEXT: addi sp, sp, -464 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: addi a0, sp, 8 +; RV64IZCMP-NEXT: call test@plt +; RV64IZCMP-NEXT: addi sp, sp, 464 +; RV64IZCMP-NEXT: cm.popretz {ra}, 64 +; +; RV32I-LABEL: foo: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -528 +; RV32I-NEXT: .cfi_def_cfa_offset 528 +; RV32I-NEXT: sw ra, 524(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi a0, sp, 12 +; RV32I-NEXT: call test@plt +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: lw ra, 524(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 528 +; RV32I-NEXT: ret +; +; RV64I-LABEL: foo: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -528 +; RV64I-NEXT: .cfi_def_cfa_offset 528 +; RV64I-NEXT: sd ra, 520(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi a0, sp, 8 +; RV64I-NEXT: call test@plt +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: ld ra, 520(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 528 +; RV64I-NEXT: ret + %1 = alloca [512 x i8] + %2 = getelementptr [512 x i8], [512 x i8]* %1, i32 0, i32 0 + call void @test(i8* %2) + ret i32 0 +} + +define dso_local i32 @pushpopret0(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopret0: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popretz {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopret0: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popretz {ra, s0}, 16 +; +; RV32I-LABEL: pushpopret0: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopret0: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 0 +} + +define dso_local i32 @pushpopret1(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopret1: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: li a0, 1 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopret1: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: li a0, 1 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV32I-LABEL: pushpopret1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopret1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 1 +} + +define dso_local i32 @pushpopretneg1(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopretneg1: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: li a0, -1 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopretneg1: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: li a0, -1 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV32I-LABEL: pushpopretneg1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, -1 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopretneg1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, -1 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 -1 +} + +define dso_local i32 @pushpopret2(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: pushpopret2: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: li a0, 2 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV64IZCMP-LABEL: pushpopret2: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: li a0, 2 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.popret {ra, s0}, 16 +; +; RV32I-LABEL: pushpopret2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: li a0, 2 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: pushpopret2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: li a0, 2 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_void(i8* nonnull %0) + ret i32 2 +} + +define dso_local i32 @tailcall(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: tailcall: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: addi s0, sp, 16 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub a0, sp, a0 +; RV32IZCMP-NEXT: mv sp, a0 +; RV32IZCMP-NEXT: addi sp, s0, -16 +; RV32IZCMP-NEXT: cm.pop {ra, s0}, 16 +; RV32IZCMP-NEXT: tail callee@plt +; +; RV64IZCMP-LABEL: tailcall: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: cm.push {ra, s0}, -16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: addi s0, sp, 16 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub a0, sp, a0 +; RV64IZCMP-NEXT: mv sp, a0 +; RV64IZCMP-NEXT: addi sp, s0, -16 +; RV64IZCMP-NEXT: cm.pop {ra, s0}, 16 +; RV64IZCMP-NEXT: tail callee@plt +; +; RV32I-LABEL: tailcall: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 16 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: mv sp, a0 +; RV32I-NEXT: addi sp, s0, -16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail callee@plt +; +; RV64I-LABEL: tailcall: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 16 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: mv sp, a0 +; RV64I-NEXT: addi sp, s0, -16 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: tail callee@plt +entry: + %0 = alloca i8, i32 %size, align 16 + %1 = tail call i32 @callee(i8* nonnull %0) + ret i32 %1 +} + +@var = global [5 x i32] zeroinitializer +define dso_local i32 @nocompress(i32 signext %size) local_unnamed_addr #0 { +; RV32IZCMP-LABEL: nocompress: +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 48 +; RV32IZCMP-NEXT: cm.push {ra, s0-s8}, -48 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: .cfi_offset s0, -8 +; RV32IZCMP-NEXT: .cfi_offset s1, -12 +; RV32IZCMP-NEXT: .cfi_offset s2, -16 +; RV32IZCMP-NEXT: .cfi_offset s3, -20 +; RV32IZCMP-NEXT: .cfi_offset s4, -24 +; RV32IZCMP-NEXT: .cfi_offset s5, -28 +; RV32IZCMP-NEXT: .cfi_offset s6, -32 +; RV32IZCMP-NEXT: .cfi_offset s7, -36 +; RV32IZCMP-NEXT: .cfi_offset s8, -40 +; RV32IZCMP-NEXT: addi s0, sp, 48 +; RV32IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV32IZCMP-NEXT: addi a0, a0, 15 +; RV32IZCMP-NEXT: andi a0, a0, -16 +; RV32IZCMP-NEXT: sub s1, sp, a0 +; RV32IZCMP-NEXT: mv sp, s1 +; RV32IZCMP-NEXT: lui s2, %hi(var) +; RV32IZCMP-NEXT: lw s3, %lo(var)(s2) +; RV32IZCMP-NEXT: lw s4, %lo(var+4)(s2) +; RV32IZCMP-NEXT: lw s5, %lo(var+8)(s2) +; RV32IZCMP-NEXT: lw s6, %lo(var+12)(s2) +; RV32IZCMP-NEXT: addi s7, s2, %lo(var) +; RV32IZCMP-NEXT: lw s8, 16(s7) +; RV32IZCMP-NEXT: mv a0, s1 +; RV32IZCMP-NEXT: call callee_void@plt +; RV32IZCMP-NEXT: sw s8, 16(s7) +; RV32IZCMP-NEXT: sw s6, %lo(var+12)(s2) +; RV32IZCMP-NEXT: sw s5, %lo(var+8)(s2) +; RV32IZCMP-NEXT: sw s4, %lo(var+4)(s2) +; RV32IZCMP-NEXT: sw s3, %lo(var)(s2) +; RV32IZCMP-NEXT: mv a0, s1 +; RV32IZCMP-NEXT: addi sp, s0, -48 +; RV32IZCMP-NEXT: cm.pop {ra, s0-s8}, 48 +; RV32IZCMP-NEXT: tail callee@plt +; +; RV64IZCMP-LABEL: nocompress: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 80 +; RV64IZCMP-NEXT: cm.push {ra, s0-s8}, -80 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: .cfi_offset s0, -16 +; RV64IZCMP-NEXT: .cfi_offset s1, -24 +; RV64IZCMP-NEXT: .cfi_offset s2, -32 +; RV64IZCMP-NEXT: .cfi_offset s3, -40 +; RV64IZCMP-NEXT: .cfi_offset s4, -48 +; RV64IZCMP-NEXT: .cfi_offset s5, -56 +; RV64IZCMP-NEXT: .cfi_offset s6, -64 +; RV64IZCMP-NEXT: .cfi_offset s7, -72 +; RV64IZCMP-NEXT: .cfi_offset s8, -80 +; RV64IZCMP-NEXT: addi s0, sp, 80 +; RV64IZCMP-NEXT: .cfi_def_cfa s0, 0 +; RV64IZCMP-NEXT: slli a0, a0, 32 +; RV64IZCMP-NEXT: srli a0, a0, 32 +; RV64IZCMP-NEXT: addi a0, a0, 15 +; RV64IZCMP-NEXT: andi a0, a0, -16 +; RV64IZCMP-NEXT: sub s1, sp, a0 +; RV64IZCMP-NEXT: mv sp, s1 +; RV64IZCMP-NEXT: lui s2, %hi(var) +; RV64IZCMP-NEXT: lw s3, %lo(var)(s2) +; RV64IZCMP-NEXT: lw s4, %lo(var+4)(s2) +; RV64IZCMP-NEXT: lw s5, %lo(var+8)(s2) +; RV64IZCMP-NEXT: lw s6, %lo(var+12)(s2) +; RV64IZCMP-NEXT: addi s7, s2, %lo(var) +; RV64IZCMP-NEXT: lw s8, 16(s7) +; RV64IZCMP-NEXT: mv a0, s1 +; RV64IZCMP-NEXT: call callee_void@plt +; RV64IZCMP-NEXT: sw s8, 16(s7) +; RV64IZCMP-NEXT: sw s6, %lo(var+12)(s2) +; RV64IZCMP-NEXT: sw s5, %lo(var+8)(s2) +; RV64IZCMP-NEXT: sw s4, %lo(var+4)(s2) +; RV64IZCMP-NEXT: sw s3, %lo(var)(s2) +; RV64IZCMP-NEXT: mv a0, s1 +; RV64IZCMP-NEXT: addi sp, s0, -80 +; RV64IZCMP-NEXT: cm.pop {ra, s0-s8}, 80 +; RV64IZCMP-NEXT: tail callee@plt +; +; RV32I-LABEL: nocompress: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: .cfi_def_cfa_offset 48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: .cfi_offset s4, -24 +; RV32I-NEXT: .cfi_offset s5, -28 +; RV32I-NEXT: .cfi_offset s6, -32 +; RV32I-NEXT: .cfi_offset s7, -36 +; RV32I-NEXT: .cfi_offset s8, -40 +; RV32I-NEXT: addi s0, sp, 48 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub s1, sp, a0 +; RV32I-NEXT: mv sp, s1 +; RV32I-NEXT: lui s2, %hi(var) +; RV32I-NEXT: lw s3, %lo(var)(s2) +; RV32I-NEXT: lw s4, %lo(var+4)(s2) +; RV32I-NEXT: lw s5, %lo(var+8)(s2) +; RV32I-NEXT: lw s6, %lo(var+12)(s2) +; RV32I-NEXT: addi s7, s2, %lo(var) +; RV32I-NEXT: lw s8, 16(s7) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call callee_void@plt +; RV32I-NEXT: sw s8, 16(s7) +; RV32I-NEXT: sw s6, %lo(var+12)(s2) +; RV32I-NEXT: sw s5, %lo(var+8)(s2) +; RV32I-NEXT: sw s4, %lo(var+4)(s2) +; RV32I-NEXT: sw s3, %lo(var)(s2) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: addi sp, s0, -48 +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: tail callee@plt +; +; RV64I-LABEL: nocompress: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -80 +; RV64I-NEXT: .cfi_def_cfa_offset 80 +; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: .cfi_offset s4, -48 +; RV64I-NEXT: .cfi_offset s5, -56 +; RV64I-NEXT: .cfi_offset s6, -64 +; RV64I-NEXT: .cfi_offset s7, -72 +; RV64I-NEXT: .cfi_offset s8, -80 +; RV64I-NEXT: addi s0, sp, 80 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub s1, sp, a0 +; RV64I-NEXT: mv sp, s1 +; RV64I-NEXT: lui s2, %hi(var) +; RV64I-NEXT: lw s3, %lo(var)(s2) +; RV64I-NEXT: lw s4, %lo(var+4)(s2) +; RV64I-NEXT: lw s5, %lo(var+8)(s2) +; RV64I-NEXT: lw s6, %lo(var+12)(s2) +; RV64I-NEXT: addi s7, s2, %lo(var) +; RV64I-NEXT: lw s8, 16(s7) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call callee_void@plt +; RV64I-NEXT: sw s8, 16(s7) +; RV64I-NEXT: sw s6, %lo(var+12)(s2) +; RV64I-NEXT: sw s5, %lo(var+8)(s2) +; RV64I-NEXT: sw s4, %lo(var+4)(s2) +; RV64I-NEXT: sw s3, %lo(var)(s2) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: addi sp, s0, -80 +; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 80 +; RV64I-NEXT: tail callee@plt +entry: + %0 = alloca i8, i32 %size, align 16 + %val = load [5 x i32], [5 x i32]* @var + call void @callee_void(i8* nonnull %0) + store volatile [5 x i32] %val, [5 x i32]* @var + %1 = tail call i32 @callee(i8* nonnull %0) + ret i32 %1 +} + +declare void @callee_void(i8*) +declare i32 @callee(i8*) + +declare i32 @foo_test_irq(...) +@var_test_irq = global [32 x i32] zeroinitializer + +define void @foo_with_irq() nounwind "interrupt"="user" { +; RV32IZCMP-LABEL: foo_with_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra}, -64 +; RV32IZCMP-NEXT: sw t0, 56(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t1, 52(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t2, 48(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a6, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a7, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: call foo_test_irq@plt +; RV32IZCMP-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a7, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a4, 28(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a3, 32(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a2, 36(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a1, 40(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t2, 48(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t1, 52(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t0, 56(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: cm.pop {ra}, 64 +; RV32IZCMP-NEXT: uret +; +; RV64IZCMP-LABEL: foo_with_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra}, -64 +; RV64IZCMP-NEXT: addi sp, sp, -64 +; RV64IZCMP-NEXT: sd t0, 116(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t1, 108(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t2, 100(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a0, 92(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a1, 84(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a2, 76(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a3, 68(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a4, 60(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a5, 52(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a6, 44(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a7, 36(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t3, 28(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t4, 20(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t5, 12(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t6, 4(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: call foo_test_irq@plt +; RV64IZCMP-NEXT: ld t6, 4(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t5, 12(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t4, 20(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t3, 28(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a7, 36(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a6, 44(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a5, 52(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a4, 60(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a3, 68(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a2, 76(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a1, 84(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a0, 92(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t2, 100(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t1, 108(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t0, 116(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: addi sp, sp, 64 +; RV64IZCMP-NEXT: cm.pop {ra}, 64 +; RV64IZCMP-NEXT: uret +; +; RV32I-LABEL: foo_with_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t1, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t2, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a6, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a7, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: call foo_test_irq@plt +; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t2, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a2, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a4, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a7, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: uret +; +; RV64I-LABEL: foo_with_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -128 +; RV64I-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t0, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t1, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t2, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a2, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a3, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a4, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a5, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a6, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a7, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t3, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t4, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t5, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t6, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: call foo_test_irq@plt +; RV64I-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t0, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t1, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t2, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a2, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a3, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a4, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a5, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a6, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a7, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t3, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t4, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t5, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t6, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 128 +; RV64I-NEXT: uret + %call = call i32 bitcast (i32 (...)* @foo_test_irq to i32 ()*)() + ret void +} + +define void @foo_no_irq() nounwind{ +; RV32IZCMP-LABEL: foo_no_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra}, -16 +; RV32IZCMP-NEXT: call foo_test_irq@plt +; RV32IZCMP-NEXT: cm.popret {ra}, 16 +; +; RV64IZCMP-LABEL: foo_no_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra}, -16 +; RV64IZCMP-NEXT: call foo_test_irq@plt +; RV64IZCMP-NEXT: cm.popret {ra}, 16 +; +; RV32I-LABEL: foo_no_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call foo_test_irq@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: foo_no_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call foo_test_irq@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %call = call i32 bitcast (i32 (...)* @foo_test_irq to i32 ()*)() + ret void +} + +define void @callee_with_irq() nounwind "interrupt"="user" { +; RV32IZCMP-LABEL: callee_with_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -112 +; RV32IZCMP-NEXT: addi sp, sp, -32 +; RV32IZCMP-NEXT: sw t0, 88(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t1, 84(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t2, 80(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a1, 72(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a2, 68(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a3, 64(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a4, 60(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a5, 56(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a6, 52(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw a7, 48(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t3, 44(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t4, 40(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t5, 36(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: sw t6, 32(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32IZCMP-NEXT: lw a0, 16(a5) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(a5) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw t0, 24(a5) +; RV32IZCMP-NEXT: lw t1, 28(a5) +; RV32IZCMP-NEXT: lw t2, 32(a5) +; RV32IZCMP-NEXT: lw t3, 36(a5) +; RV32IZCMP-NEXT: lw t4, 40(a5) +; RV32IZCMP-NEXT: lw t5, 44(a5) +; RV32IZCMP-NEXT: lw t6, 48(a5) +; RV32IZCMP-NEXT: lw s0, 52(a5) +; RV32IZCMP-NEXT: lw s1, 56(a5) +; RV32IZCMP-NEXT: lw s2, 60(a5) +; RV32IZCMP-NEXT: lw s3, 64(a5) +; RV32IZCMP-NEXT: lw s4, 68(a5) +; RV32IZCMP-NEXT: lw s5, 72(a5) +; RV32IZCMP-NEXT: lw s6, 76(a5) +; RV32IZCMP-NEXT: lw s7, 80(a5) +; RV32IZCMP-NEXT: lw s8, 84(a5) +; RV32IZCMP-NEXT: lw s9, 88(a5) +; RV32IZCMP-NEXT: lw s10, 92(a5) +; RV32IZCMP-NEXT: lw s11, 96(a5) +; RV32IZCMP-NEXT: lw ra, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a0, 124(a5) +; RV32IZCMP-NEXT: lw a1, 120(a5) +; RV32IZCMP-NEXT: lw a2, 116(a5) +; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: sw a0, 124(a5) +; RV32IZCMP-NEXT: sw a1, 120(a5) +; RV32IZCMP-NEXT: sw a2, 116(a5) +; RV32IZCMP-NEXT: sw a3, 112(a5) +; RV32IZCMP-NEXT: sw a4, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw ra, 100(a5) +; RV32IZCMP-NEXT: sw s11, 96(a5) +; RV32IZCMP-NEXT: sw s10, 92(a5) +; RV32IZCMP-NEXT: sw s9, 88(a5) +; RV32IZCMP-NEXT: sw s8, 84(a5) +; RV32IZCMP-NEXT: sw s7, 80(a5) +; RV32IZCMP-NEXT: sw s6, 76(a5) +; RV32IZCMP-NEXT: sw s5, 72(a5) +; RV32IZCMP-NEXT: sw s4, 68(a5) +; RV32IZCMP-NEXT: sw s3, 64(a5) +; RV32IZCMP-NEXT: sw s2, 60(a5) +; RV32IZCMP-NEXT: sw s1, 56(a5) +; RV32IZCMP-NEXT: sw s0, 52(a5) +; RV32IZCMP-NEXT: sw t6, 48(a5) +; RV32IZCMP-NEXT: sw t5, 44(a5) +; RV32IZCMP-NEXT: sw t4, 40(a5) +; RV32IZCMP-NEXT: sw t3, 36(a5) +; RV32IZCMP-NEXT: sw t2, 32(a5) +; RV32IZCMP-NEXT: sw t1, 28(a5) +; RV32IZCMP-NEXT: sw t0, 24(a5) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(a5) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(a5) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: lw t6, 32(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t5, 36(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t4, 40(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t3, 44(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a7, 48(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a6, 52(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a5, 56(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a4, 60(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a3, 64(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a2, 68(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a1, 72(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t2, 80(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t1, 84(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: lw t0, 88(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: addi sp, sp, 32 +; RV32IZCMP-NEXT: cm.pop {ra, s0-s11}, 112 +; RV32IZCMP-NEXT: uret +; +; RV64IZCMP-LABEL: callee_with_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: addi sp, sp, -112 +; RV64IZCMP-NEXT: sd t0, 212(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t1, 204(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t2, 196(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a0, 188(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a1, 180(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a2, 172(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a3, 164(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a4, 156(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a5, 148(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a6, 140(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd a7, 132(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t3, 124(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t4, 116(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t5, 108(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: sd t6, 100(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64IZCMP-NEXT: lw a0, 16(a5) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(a5) +; RV64IZCMP-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw t0, 24(a5) +; RV64IZCMP-NEXT: lw t1, 28(a5) +; RV64IZCMP-NEXT: lw t2, 32(a5) +; RV64IZCMP-NEXT: lw t3, 36(a5) +; RV64IZCMP-NEXT: lw t4, 40(a5) +; RV64IZCMP-NEXT: lw t5, 44(a5) +; RV64IZCMP-NEXT: lw t6, 48(a5) +; RV64IZCMP-NEXT: lw s0, 52(a5) +; RV64IZCMP-NEXT: lw s1, 56(a5) +; RV64IZCMP-NEXT: lw s2, 60(a5) +; RV64IZCMP-NEXT: lw s3, 64(a5) +; RV64IZCMP-NEXT: lw s4, 68(a5) +; RV64IZCMP-NEXT: lw s5, 72(a5) +; RV64IZCMP-NEXT: lw s6, 76(a5) +; RV64IZCMP-NEXT: lw s7, 80(a5) +; RV64IZCMP-NEXT: lw s8, 84(a5) +; RV64IZCMP-NEXT: lw s9, 88(a5) +; RV64IZCMP-NEXT: lw s10, 92(a5) +; RV64IZCMP-NEXT: lw s11, 96(a5) +; RV64IZCMP-NEXT: lw ra, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a0, 124(a5) +; RV64IZCMP-NEXT: lw a1, 120(a5) +; RV64IZCMP-NEXT: lw a2, 116(a5) +; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: sw a0, 124(a5) +; RV64IZCMP-NEXT: sw a1, 120(a5) +; RV64IZCMP-NEXT: sw a2, 116(a5) +; RV64IZCMP-NEXT: sw a3, 112(a5) +; RV64IZCMP-NEXT: sw a4, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw ra, 100(a5) +; RV64IZCMP-NEXT: sw s11, 96(a5) +; RV64IZCMP-NEXT: sw s10, 92(a5) +; RV64IZCMP-NEXT: sw s9, 88(a5) +; RV64IZCMP-NEXT: sw s8, 84(a5) +; RV64IZCMP-NEXT: sw s7, 80(a5) +; RV64IZCMP-NEXT: sw s6, 76(a5) +; RV64IZCMP-NEXT: sw s5, 72(a5) +; RV64IZCMP-NEXT: sw s4, 68(a5) +; RV64IZCMP-NEXT: sw s3, 64(a5) +; RV64IZCMP-NEXT: sw s2, 60(a5) +; RV64IZCMP-NEXT: sw s1, 56(a5) +; RV64IZCMP-NEXT: sw s0, 52(a5) +; RV64IZCMP-NEXT: sw t6, 48(a5) +; RV64IZCMP-NEXT: sw t5, 44(a5) +; RV64IZCMP-NEXT: sw t4, 40(a5) +; RV64IZCMP-NEXT: sw t3, 36(a5) +; RV64IZCMP-NEXT: sw t2, 32(a5) +; RV64IZCMP-NEXT: sw t1, 28(a5) +; RV64IZCMP-NEXT: sw t0, 24(a5) +; RV64IZCMP-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(a5) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(a5) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: ld t6, 100(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t5, 108(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t4, 116(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t3, 124(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a7, 132(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a6, 140(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a5, 148(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a4, 156(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a3, 164(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a2, 172(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a1, 180(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld a0, 188(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t2, 196(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t1, 204(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: ld t0, 212(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: addi sp, sp, 112 +; RV64IZCMP-NEXT: cm.pop {ra, s0-s11}, 160 +; RV64IZCMP-NEXT: uret +; +; RV32I-LABEL: callee_with_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -144 +; RV32I-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t0, 136(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t1, 132(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t2, 128(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 124(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 120(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a0, 116(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 112(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a2, 108(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a3, 104(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a4, 100(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a5, 96(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a6, 92(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a7, 88(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 84(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 80(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t3, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t4, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t5, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t6, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a7, %hi(var_test_irq) +; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32I-NEXT: lw a0, 16(a5) +; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 20(a5) +; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw t0, 24(a5) +; RV32I-NEXT: lw t1, 28(a5) +; RV32I-NEXT: lw t2, 32(a5) +; RV32I-NEXT: lw t3, 36(a5) +; RV32I-NEXT: lw t4, 40(a5) +; RV32I-NEXT: lw t5, 44(a5) +; RV32I-NEXT: lw t6, 48(a5) +; RV32I-NEXT: lw s0, 52(a5) +; RV32I-NEXT: lw s1, 56(a5) +; RV32I-NEXT: lw s2, 60(a5) +; RV32I-NEXT: lw s3, 64(a5) +; RV32I-NEXT: lw s4, 68(a5) +; RV32I-NEXT: lw s5, 72(a5) +; RV32I-NEXT: lw s6, 76(a5) +; RV32I-NEXT: lw s7, 80(a5) +; RV32I-NEXT: lw s8, 84(a5) +; RV32I-NEXT: lw s9, 88(a5) +; RV32I-NEXT: lw s10, 92(a5) +; RV32I-NEXT: lw s11, 96(a5) +; RV32I-NEXT: lw ra, 100(a5) +; RV32I-NEXT: lw a6, 104(a5) +; RV32I-NEXT: lw a4, 108(a5) +; RV32I-NEXT: lw a0, 124(a5) +; RV32I-NEXT: lw a1, 120(a5) +; RV32I-NEXT: lw a2, 116(a5) +; RV32I-NEXT: lw a3, 112(a5) +; RV32I-NEXT: sw a0, 124(a5) +; RV32I-NEXT: sw a1, 120(a5) +; RV32I-NEXT: sw a2, 116(a5) +; RV32I-NEXT: sw a3, 112(a5) +; RV32I-NEXT: sw a4, 108(a5) +; RV32I-NEXT: sw a6, 104(a5) +; RV32I-NEXT: sw ra, 100(a5) +; RV32I-NEXT: sw s11, 96(a5) +; RV32I-NEXT: sw s10, 92(a5) +; RV32I-NEXT: sw s9, 88(a5) +; RV32I-NEXT: sw s8, 84(a5) +; RV32I-NEXT: sw s7, 80(a5) +; RV32I-NEXT: sw s6, 76(a5) +; RV32I-NEXT: sw s5, 72(a5) +; RV32I-NEXT: sw s4, 68(a5) +; RV32I-NEXT: sw s3, 64(a5) +; RV32I-NEXT: sw s2, 60(a5) +; RV32I-NEXT: sw s1, 56(a5) +; RV32I-NEXT: sw s0, 52(a5) +; RV32I-NEXT: sw t6, 48(a5) +; RV32I-NEXT: sw t5, 44(a5) +; RV32I-NEXT: sw t4, 40(a5) +; RV32I-NEXT: sw t3, 36(a5) +; RV32I-NEXT: sw t2, 32(a5) +; RV32I-NEXT: sw t1, 28(a5) +; RV32I-NEXT: sw t0, 24(a5) +; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 20(a5) +; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 16(a5) +; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t0, 136(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t1, 132(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t2, 128(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 124(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 120(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a0, 116(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a1, 112(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a2, 108(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a3, 104(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a4, 100(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a5, 96(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a6, 92(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw a7, 88(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 84(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 80(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t3, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t4, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t5, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw t6, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 144 +; RV32I-NEXT: uret +; +; RV64I-LABEL: callee_with_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -272 +; RV64I-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t0, 256(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t1, 248(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t2, 240(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 232(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 224(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a0, 216(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a2, 200(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a3, 192(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a4, 184(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a5, 176(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a6, 168(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a7, 160(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t3, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t4, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t5, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd t6, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a7, %hi(var_test_irq) +; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64I-NEXT: lw a0, 16(a5) +; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 20(a5) +; RV64I-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw t0, 24(a5) +; RV64I-NEXT: lw t1, 28(a5) +; RV64I-NEXT: lw t2, 32(a5) +; RV64I-NEXT: lw t3, 36(a5) +; RV64I-NEXT: lw t4, 40(a5) +; RV64I-NEXT: lw t5, 44(a5) +; RV64I-NEXT: lw t6, 48(a5) +; RV64I-NEXT: lw s0, 52(a5) +; RV64I-NEXT: lw s1, 56(a5) +; RV64I-NEXT: lw s2, 60(a5) +; RV64I-NEXT: lw s3, 64(a5) +; RV64I-NEXT: lw s4, 68(a5) +; RV64I-NEXT: lw s5, 72(a5) +; RV64I-NEXT: lw s6, 76(a5) +; RV64I-NEXT: lw s7, 80(a5) +; RV64I-NEXT: lw s8, 84(a5) +; RV64I-NEXT: lw s9, 88(a5) +; RV64I-NEXT: lw s10, 92(a5) +; RV64I-NEXT: lw s11, 96(a5) +; RV64I-NEXT: lw ra, 100(a5) +; RV64I-NEXT: lw a6, 104(a5) +; RV64I-NEXT: lw a4, 108(a5) +; RV64I-NEXT: lw a0, 124(a5) +; RV64I-NEXT: lw a1, 120(a5) +; RV64I-NEXT: lw a2, 116(a5) +; RV64I-NEXT: lw a3, 112(a5) +; RV64I-NEXT: sw a0, 124(a5) +; RV64I-NEXT: sw a1, 120(a5) +; RV64I-NEXT: sw a2, 116(a5) +; RV64I-NEXT: sw a3, 112(a5) +; RV64I-NEXT: sw a4, 108(a5) +; RV64I-NEXT: sw a6, 104(a5) +; RV64I-NEXT: sw ra, 100(a5) +; RV64I-NEXT: sw s11, 96(a5) +; RV64I-NEXT: sw s10, 92(a5) +; RV64I-NEXT: sw s9, 88(a5) +; RV64I-NEXT: sw s8, 84(a5) +; RV64I-NEXT: sw s7, 80(a5) +; RV64I-NEXT: sw s6, 76(a5) +; RV64I-NEXT: sw s5, 72(a5) +; RV64I-NEXT: sw s4, 68(a5) +; RV64I-NEXT: sw s3, 64(a5) +; RV64I-NEXT: sw s2, 60(a5) +; RV64I-NEXT: sw s1, 56(a5) +; RV64I-NEXT: sw s0, 52(a5) +; RV64I-NEXT: sw t6, 48(a5) +; RV64I-NEXT: sw t5, 44(a5) +; RV64I-NEXT: sw t4, 40(a5) +; RV64I-NEXT: sw t3, 36(a5) +; RV64I-NEXT: sw t2, 32(a5) +; RV64I-NEXT: sw t1, 28(a5) +; RV64I-NEXT: sw t0, 24(a5) +; RV64I-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 20(a5) +; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 16(a5) +; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t0, 256(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t1, 248(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t2, 240(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 232(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 224(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a2, 200(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a3, 192(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a4, 184(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a5, 176(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a6, 168(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld a7, 160(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t3, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t4, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t5, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld t6, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 272 +; RV64I-NEXT: uret + %val = load [32 x i32], [32 x i32]* @var_test_irq + store volatile [32 x i32] %val, [32 x i32]* @var_test_irq + ret void +} + +define void @callee_no_irq() nounwind{ +; RV32IZCMP-LABEL: callee_no_irq: +; RV32IZCMP: # %bb.0: +; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -80 +; RV32IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32IZCMP-NEXT: lw a0, 16(a5) +; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw a0, 20(a5) +; RV32IZCMP-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IZCMP-NEXT: lw t0, 24(a5) +; RV32IZCMP-NEXT: lw t1, 28(a5) +; RV32IZCMP-NEXT: lw t2, 32(a5) +; RV32IZCMP-NEXT: lw t3, 36(a5) +; RV32IZCMP-NEXT: lw t4, 40(a5) +; RV32IZCMP-NEXT: lw t5, 44(a5) +; RV32IZCMP-NEXT: lw t6, 48(a5) +; RV32IZCMP-NEXT: lw s0, 52(a5) +; RV32IZCMP-NEXT: lw s1, 56(a5) +; RV32IZCMP-NEXT: lw s2, 60(a5) +; RV32IZCMP-NEXT: lw s3, 64(a5) +; RV32IZCMP-NEXT: lw s4, 68(a5) +; RV32IZCMP-NEXT: lw s5, 72(a5) +; RV32IZCMP-NEXT: lw s6, 76(a5) +; RV32IZCMP-NEXT: lw s7, 80(a5) +; RV32IZCMP-NEXT: lw s8, 84(a5) +; RV32IZCMP-NEXT: lw s9, 88(a5) +; RV32IZCMP-NEXT: lw s10, 92(a5) +; RV32IZCMP-NEXT: lw s11, 96(a5) +; RV32IZCMP-NEXT: lw ra, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a4, 108(a5) +; RV32IZCMP-NEXT: lw a0, 124(a5) +; RV32IZCMP-NEXT: lw a1, 120(a5) +; RV32IZCMP-NEXT: lw a2, 116(a5) +; RV32IZCMP-NEXT: lw a3, 112(a5) +; RV32IZCMP-NEXT: sw a0, 124(a5) +; RV32IZCMP-NEXT: sw a1, 120(a5) +; RV32IZCMP-NEXT: sw a2, 116(a5) +; RV32IZCMP-NEXT: sw a3, 112(a5) +; RV32IZCMP-NEXT: sw a4, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw ra, 100(a5) +; RV32IZCMP-NEXT: sw s11, 96(a5) +; RV32IZCMP-NEXT: sw s10, 92(a5) +; RV32IZCMP-NEXT: sw s9, 88(a5) +; RV32IZCMP-NEXT: sw s8, 84(a5) +; RV32IZCMP-NEXT: sw s7, 80(a5) +; RV32IZCMP-NEXT: sw s6, 76(a5) +; RV32IZCMP-NEXT: sw s5, 72(a5) +; RV32IZCMP-NEXT: sw s4, 68(a5) +; RV32IZCMP-NEXT: sw s3, 64(a5) +; RV32IZCMP-NEXT: sw s2, 60(a5) +; RV32IZCMP-NEXT: sw s1, 56(a5) +; RV32IZCMP-NEXT: sw s0, 52(a5) +; RV32IZCMP-NEXT: sw t6, 48(a5) +; RV32IZCMP-NEXT: sw t5, 44(a5) +; RV32IZCMP-NEXT: sw t4, 40(a5) +; RV32IZCMP-NEXT: sw t3, 36(a5) +; RV32IZCMP-NEXT: sw t2, 32(a5) +; RV32IZCMP-NEXT: sw t1, 28(a5) +; RV32IZCMP-NEXT: sw t0, 24(a5) +; RV32IZCMP-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 20(a5) +; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, 16(a5) +; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 80 +; +; RV64IZCMP-LABEL: callee_no_irq: +; RV64IZCMP: # %bb.0: +; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160 +; RV64IZCMP-NEXT: lui a7, %hi(var_test_irq) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64IZCMP-NEXT: lw a0, 16(a5) +; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw a0, 20(a5) +; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IZCMP-NEXT: lw t0, 24(a5) +; RV64IZCMP-NEXT: lw t1, 28(a5) +; RV64IZCMP-NEXT: lw t2, 32(a5) +; RV64IZCMP-NEXT: lw t3, 36(a5) +; RV64IZCMP-NEXT: lw t4, 40(a5) +; RV64IZCMP-NEXT: lw t5, 44(a5) +; RV64IZCMP-NEXT: lw t6, 48(a5) +; RV64IZCMP-NEXT: lw s0, 52(a5) +; RV64IZCMP-NEXT: lw s1, 56(a5) +; RV64IZCMP-NEXT: lw s2, 60(a5) +; RV64IZCMP-NEXT: lw s3, 64(a5) +; RV64IZCMP-NEXT: lw s4, 68(a5) +; RV64IZCMP-NEXT: lw s5, 72(a5) +; RV64IZCMP-NEXT: lw s6, 76(a5) +; RV64IZCMP-NEXT: lw s7, 80(a5) +; RV64IZCMP-NEXT: lw s8, 84(a5) +; RV64IZCMP-NEXT: lw s9, 88(a5) +; RV64IZCMP-NEXT: lw s10, 92(a5) +; RV64IZCMP-NEXT: lw s11, 96(a5) +; RV64IZCMP-NEXT: lw ra, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a4, 108(a5) +; RV64IZCMP-NEXT: lw a0, 124(a5) +; RV64IZCMP-NEXT: lw a1, 120(a5) +; RV64IZCMP-NEXT: lw a2, 116(a5) +; RV64IZCMP-NEXT: lw a3, 112(a5) +; RV64IZCMP-NEXT: sw a0, 124(a5) +; RV64IZCMP-NEXT: sw a1, 120(a5) +; RV64IZCMP-NEXT: sw a2, 116(a5) +; RV64IZCMP-NEXT: sw a3, 112(a5) +; RV64IZCMP-NEXT: sw a4, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw ra, 100(a5) +; RV64IZCMP-NEXT: sw s11, 96(a5) +; RV64IZCMP-NEXT: sw s10, 92(a5) +; RV64IZCMP-NEXT: sw s9, 88(a5) +; RV64IZCMP-NEXT: sw s8, 84(a5) +; RV64IZCMP-NEXT: sw s7, 80(a5) +; RV64IZCMP-NEXT: sw s6, 76(a5) +; RV64IZCMP-NEXT: sw s5, 72(a5) +; RV64IZCMP-NEXT: sw s4, 68(a5) +; RV64IZCMP-NEXT: sw s3, 64(a5) +; RV64IZCMP-NEXT: sw s2, 60(a5) +; RV64IZCMP-NEXT: sw s1, 56(a5) +; RV64IZCMP-NEXT: sw s0, 52(a5) +; RV64IZCMP-NEXT: sw t6, 48(a5) +; RV64IZCMP-NEXT: sw t5, 44(a5) +; RV64IZCMP-NEXT: sw t4, 40(a5) +; RV64IZCMP-NEXT: sw t3, 36(a5) +; RV64IZCMP-NEXT: sw t2, 32(a5) +; RV64IZCMP-NEXT: sw t1, 28(a5) +; RV64IZCMP-NEXT: sw t0, 24(a5) +; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 20(a5) +; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, 16(a5) +; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 +; +; RV32I-LABEL: callee_no_irq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a7, %hi(var_test_irq) +; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32I-NEXT: lw a0, 16(a5) +; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 20(a5) +; RV32I-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw t0, 24(a5) +; RV32I-NEXT: lw t1, 28(a5) +; RV32I-NEXT: lw t2, 32(a5) +; RV32I-NEXT: lw t3, 36(a5) +; RV32I-NEXT: lw t4, 40(a5) +; RV32I-NEXT: lw t5, 44(a5) +; RV32I-NEXT: lw t6, 48(a5) +; RV32I-NEXT: lw s0, 52(a5) +; RV32I-NEXT: lw s1, 56(a5) +; RV32I-NEXT: lw s2, 60(a5) +; RV32I-NEXT: lw s3, 64(a5) +; RV32I-NEXT: lw s4, 68(a5) +; RV32I-NEXT: lw s5, 72(a5) +; RV32I-NEXT: lw s6, 76(a5) +; RV32I-NEXT: lw s7, 80(a5) +; RV32I-NEXT: lw s8, 84(a5) +; RV32I-NEXT: lw s9, 88(a5) +; RV32I-NEXT: lw s10, 92(a5) +; RV32I-NEXT: lw s11, 96(a5) +; RV32I-NEXT: lw ra, 100(a5) +; RV32I-NEXT: lw a6, 104(a5) +; RV32I-NEXT: lw a4, 108(a5) +; RV32I-NEXT: lw a0, 124(a5) +; RV32I-NEXT: lw a1, 120(a5) +; RV32I-NEXT: lw a2, 116(a5) +; RV32I-NEXT: lw a3, 112(a5) +; RV32I-NEXT: sw a0, 124(a5) +; RV32I-NEXT: sw a1, 120(a5) +; RV32I-NEXT: sw a2, 116(a5) +; RV32I-NEXT: sw a3, 112(a5) +; RV32I-NEXT: sw a4, 108(a5) +; RV32I-NEXT: sw a6, 104(a5) +; RV32I-NEXT: sw ra, 100(a5) +; RV32I-NEXT: sw s11, 96(a5) +; RV32I-NEXT: sw s10, 92(a5) +; RV32I-NEXT: sw s9, 88(a5) +; RV32I-NEXT: sw s8, 84(a5) +; RV32I-NEXT: sw s7, 80(a5) +; RV32I-NEXT: sw s6, 76(a5) +; RV32I-NEXT: sw s5, 72(a5) +; RV32I-NEXT: sw s4, 68(a5) +; RV32I-NEXT: sw s3, 64(a5) +; RV32I-NEXT: sw s2, 60(a5) +; RV32I-NEXT: sw s1, 56(a5) +; RV32I-NEXT: sw s0, 52(a5) +; RV32I-NEXT: sw t6, 48(a5) +; RV32I-NEXT: sw t5, 44(a5) +; RV32I-NEXT: sw t4, 40(a5) +; RV32I-NEXT: sw t3, 36(a5) +; RV32I-NEXT: sw t2, 32(a5) +; RV32I-NEXT: sw t1, 28(a5) +; RV32I-NEXT: sw t0, 24(a5) +; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 20(a5) +; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, 16(a5) +; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 80 +; RV32I-NEXT: ret +; +; RV64I-LABEL: callee_no_irq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -160 +; RV64I-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 96(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 88(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 80(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a7, %hi(var_test_irq) +; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64I-NEXT: lw a0, 16(a5) +; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 20(a5) +; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw t0, 24(a5) +; RV64I-NEXT: lw t1, 28(a5) +; RV64I-NEXT: lw t2, 32(a5) +; RV64I-NEXT: lw t3, 36(a5) +; RV64I-NEXT: lw t4, 40(a5) +; RV64I-NEXT: lw t5, 44(a5) +; RV64I-NEXT: lw t6, 48(a5) +; RV64I-NEXT: lw s0, 52(a5) +; RV64I-NEXT: lw s1, 56(a5) +; RV64I-NEXT: lw s2, 60(a5) +; RV64I-NEXT: lw s3, 64(a5) +; RV64I-NEXT: lw s4, 68(a5) +; RV64I-NEXT: lw s5, 72(a5) +; RV64I-NEXT: lw s6, 76(a5) +; RV64I-NEXT: lw s7, 80(a5) +; RV64I-NEXT: lw s8, 84(a5) +; RV64I-NEXT: lw s9, 88(a5) +; RV64I-NEXT: lw s10, 92(a5) +; RV64I-NEXT: lw s11, 96(a5) +; RV64I-NEXT: lw ra, 100(a5) +; RV64I-NEXT: lw a6, 104(a5) +; RV64I-NEXT: lw a4, 108(a5) +; RV64I-NEXT: lw a0, 124(a5) +; RV64I-NEXT: lw a1, 120(a5) +; RV64I-NEXT: lw a2, 116(a5) +; RV64I-NEXT: lw a3, 112(a5) +; RV64I-NEXT: sw a0, 124(a5) +; RV64I-NEXT: sw a1, 120(a5) +; RV64I-NEXT: sw a2, 116(a5) +; RV64I-NEXT: sw a3, 112(a5) +; RV64I-NEXT: sw a4, 108(a5) +; RV64I-NEXT: sw a6, 104(a5) +; RV64I-NEXT: sw ra, 100(a5) +; RV64I-NEXT: sw s11, 96(a5) +; RV64I-NEXT: sw s10, 92(a5) +; RV64I-NEXT: sw s9, 88(a5) +; RV64I-NEXT: sw s8, 84(a5) +; RV64I-NEXT: sw s7, 80(a5) +; RV64I-NEXT: sw s6, 76(a5) +; RV64I-NEXT: sw s5, 72(a5) +; RV64I-NEXT: sw s4, 68(a5) +; RV64I-NEXT: sw s3, 64(a5) +; RV64I-NEXT: sw s2, 60(a5) +; RV64I-NEXT: sw s1, 56(a5) +; RV64I-NEXT: sw s0, 52(a5) +; RV64I-NEXT: sw t6, 48(a5) +; RV64I-NEXT: sw t5, 44(a5) +; RV64I-NEXT: sw t4, 40(a5) +; RV64I-NEXT: sw t3, 36(a5) +; RV64I-NEXT: sw t2, 32(a5) +; RV64I-NEXT: sw t1, 28(a5) +; RV64I-NEXT: sw t0, 24(a5) +; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 20(a5) +; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, 16(a5) +; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 96(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 88(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 80(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 160 +; RV64I-NEXT: ret + %val = load [32 x i32], [32 x i32]* @var_test_irq + store volatile [32 x i32] %val, [32 x i32]* @var_test_irq + ret void +} diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -187,3 +187,12 @@ .attribute arch, "rv32if_zkt1p0_zve32f1p0_zve32x1p0_zvl32b1p0" # CHECK: attribute 5, "rv32i2p0_f2p0_zkt1p0_zve32f1p0_zve32x1p0_zvl32b1p0" + +.attribute arch, "rv32izca0p70" +# CHECK: attribute 5, "rv32i2p0_zca0p70" + +.attribute arch, "rv32izcb0p70" +# CHECK: attribute 5, "rv32i2p0_zcb0p70" + +.attribute arch, "rv32izcmp0p70" +# CHECK: attribute 5, "rv32i2p0_zcmp0p70" diff --git a/llvm/test/MC/RISCV/rv32c-only-valid.s b/llvm/test/MC/RISCV/rv32c-only-valid.s --- a/llvm/test/MC/RISCV/rv32c-only-valid.s +++ b/llvm/test/MC/RISCV/rv32c-only-valid.s @@ -19,5 +19,5 @@ # CHECK-ASM: encoding: [0xfd,0x2f] # CHECK-NO-EXT: error: instruction requires the following: 'C' (Compressed Instructions) # CHECK-NO-RV32: error: instruction requires the following: RV32I Base Instruction Set -# CHECK-NO-RV32-AND-EXT: error: instruction requires the following: 'C' (Compressed Instructions), RV32I Base Instruction Set +# CHECK-NO-RV32-AND-EXT: error: instruction requires the following: 'C' (Compressed Instructions) or 'Zca' (part of the C extension), RV32I Base Instruction Set c.jal 2046 diff --git a/llvm/test/MC/RISCV/rv32zca-aliases-valid.s b/llvm/test/MC/RISCV/rv32zca-aliases-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zca-aliases-valid.s @@ -0,0 +1,66 @@ +# RUN: llvm-mc -triple=riscv32 -mattr=+experimental-zca -riscv-no-aliases < %s \ +# RUN: | FileCheck -check-prefixes=CHECK-EXPAND,CHECK-INST %s +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+experimental-zca < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zca -d -M no-aliases - \ +# RUN: | FileCheck -check-prefixes=CHECK-EXPAND,CHECK-INST %s + +# The following check prefixes are used in this test: +# CHECK-INST.....Match the canonical instr (tests alias to instr. mapping) +# CHECK-EXPAND...Match canonical instr. unconditionally (tests alias expansion) +# CHECK-INST: {{^}} + +# CHECK-EXPAND: c.li a0, 0 +li x10, 0 +# CHECK-EXPAND: c.li a0, 1 +li x10, 1 +# CHECK-EXPAND: c.li a0, -1 +li x10, -1 +# CHECK-EXPAND: addi a0, zero, 2047 +li x10, 2047 +# CHECK-EXPAND: addi a0, zero, -2047 +li x10, -2047 +# CHECK-EXPAND: c.lui a1, 1 +# CHECK-EXPAND: addi a1, a1, -2048 +li x11, 2048 +# CHECK-EXPAND: addi a1, zero, -2048 +li x11, -2048 +# CHECK-EXPAND: c.lui a1, 1 +# CHECK-EXPAND: addi a1, a1, -2047 +li x11, 2049 +# CHECK-EXPAND: lui a1, 1048575 +# CHECK-EXPAND: addi a1, a1, 2047 +li x11, -2049 +# CHECK-EXPAND: c.lui a1, 1 +# CHECK-EXPAND: c.addi a1, -1 +li x11, 4095 +# CHECK-EXPAND: lui a1, 1048575 +# CHECK-EXPAND: c.addi a1, 1 +li x11, -4095 +# CHECK-EXPAND: c.lui a2, 1 +li x12, 4096 +# CHECK-EXPAND: lui a2, 1048575 +li x12, -4096 +# CHECK-EXPAND: c.lui a2, 1 +# CHECK-EXPAND: c.addi a2, 1 +li x12, 4097 +# CHECK-EXPAND: lui a2, 1048575 +# CHECK-EXPAND: c.addi a2, -1 +li x12, -4097 +# CHECK-EXPAND: lui a2, 524288 +# CHECK-EXPAND: c.addi a2, -1 +li x12, 2147483647 +# CHECK-EXPAND: lui a2, 524288 +# CHECK-EXPAND: c.addi a2, 1 +li x12, -2147483647 +# CHECK-EXPAND: lui a2, 524288 +li x12, -2147483648 +# CHECK-EXPAND: lui a2, 524288 +li x12, -0x80000000 + +# CHECK-EXPAND: lui a2, 524288 +li x12, 0x80000000 +# CHECK-EXPAND: c.li a2, -1 +li x12, 0xFFFFFFFF + +# CHECK-EXPAND: c.mv sp, sp +addi x2, x2, 0 diff --git a/llvm/test/MC/RISCV/rv32zca-invalid.s b/llvm/test/MC/RISCV/rv32zca-invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zca-invalid.s @@ -0,0 +1,93 @@ +# RUN: not llvm-mc -triple=riscv32 -mattr=+experimental-zca -mattr=+no-rvc-hints < %s 2>&1 \ +# RUN: | FileCheck %s + +## GPRC +.LBB: +c.lw ra, 4(sp) # CHECK: :[[@LINE]]:7: error: invalid operand for instruction +c.sw sp, 4(sp) # CHECK: :[[@LINE]]:7: error: invalid operand for instruction +c.beqz t0, .LBB # CHECK: :[[@LINE]]:9: error: invalid operand for instruction +c.bnez s8, .LBB # CHECK: :[[@LINE]]:9: error: invalid operand for instruction +c.addi4spn s4, sp, 12 # CHECK: :[[@LINE]]:13: error: invalid operand for instruction +c.srli s7, 12 # CHECK: :[[@LINE]]:9: error: invalid operand for instruction +c.srai t0, 12 # CHECK: :[[@LINE]]:9: error: invalid operand for instruction +c.andi t1, 12 # CHECK: :[[@LINE]]:9: error: invalid operand for instruction +c.and t1, a0 # CHECK: :[[@LINE]]:8: error: invalid operand for instruction +c.or a0, s8 # CHECK: :[[@LINE]]:12: error: invalid operand for instruction +c.xor t2, a0 # CHECK: :[[@LINE]]:8: error: invalid operand for instruction +c.sub a0, s8 # CHECK: :[[@LINE]]:12: error: invalid operand for instruction + +## GPRNoX0 +c.lwsp x0, 4(sp) # CHECK: :[[@LINE]]:9: error: invalid operand for instruction +c.lwsp zero, 4(sp) # CHECK: :[[@LINE]]:9: error: invalid operand for instruction +c.jr x0 # CHECK: :[[@LINE]]:7: error: invalid operand for instruction +c.jalr zero # CHECK: :[[@LINE]]:9: error: invalid operand for instruction +c.addi x0, x0, 1 # CHECK: :[[@LINE]]:13: error: immediate must be zero +c.li zero, 2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RVC Hint Instructions +c.slli zero, zero, 4 # CHECK: :[[@LINE]]:15: error: invalid operand for instruction +c.mv zero, s0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RVC Hint Instructions +c.mv ra, x0 # CHECK: :[[@LINE]]:11: error: invalid operand for instruction +c.add ra, ra, x0 # CHECK: :[[@LINE]]:16: error: invalid operand for instruction +c.add zero, zero, sp # CHECK: :[[@LINE]]:14: error: invalid operand for instruction + +## GPRNoX0X2 +c.lui x0, 4 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RVC Hint Instructions +c.lui x2, 4 # CHECK: :[[@LINE]]:7: error: invalid operand for instruction + +## SP +c.addi4spn a0, a0, 12 # CHECK: :[[@LINE]]:17: error: invalid operand for instruction +c.addi16sp t0, 16 # CHECK: :[[@LINE]]:13: error: invalid operand for instruction + +# Out of range immediates + +## uimmlog2xlennonzero +c.slli t0, 64 # CHECK: :[[@LINE]]:12: error: immediate must be an integer in the range [1, 31] +c.srli a0, 32 # CHECK: :[[@LINE]]:12: error: immediate must be an integer in the range [1, 31] +c.srai a0, 0 # CHECK: :[[@LINE]]:12: error: immediate must be an integer in the range [1, 31] + +## simm6 +c.li t0, 128 # CHECK: :[[@LINE]]:10: error: immediate must be an integer in the range [-32, 31] +c.li t0, foo # CHECK: :[[@LINE]]:10: error: immediate must be an integer in the range [-32, 31] +c.li t0, %lo(foo) # CHECK: :[[@LINE]]:10: error: immediate must be an integer in the range [-32, 31] +c.li t0, %hi(foo) # CHECK: :[[@LINE]]:10: error: immediate must be an integer in the range [-32, 31] +c.andi a0, -33 # CHECK: :[[@LINE]]:12: error: immediate must be an integer in the range [-32, 31] +c.andi a0, foo # CHECK: :[[@LINE]]:12: error: immediate must be an integer in the range [-32, 31] +c.andi a0, %lo(foo) # CHECK: :[[@LINE]]:12: error: immediate must be an integer in the range [-32, 31] +c.andi a0, %hi(foo) # CHECK: :[[@LINE]]:12: error: immediate must be an integer in the range [-32, 31] + +## simm6nonzero +c.addi t0, 0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RVC Hint Instructions +c.addi t0, -33 # CHECK: :[[@LINE]]:12: error: immediate must be non-zero in the range [-32, 31] +c.addi t0, 32 # CHECK: :[[@LINE]]:12: error: immediate must be non-zero in the range [-32, 31] +c.addi t0, foo # CHECK: :[[@LINE]]:12: error: immediate must be non-zero in the range [-32, 31] +c.addi t0, %lo(foo) # CHECK: :[[@LINE]]:12: error: immediate must be non-zero in the range [-32, 31] +c.addi t0, %hi(foo) # CHECK: :[[@LINE]]:12: error: immediate must be non-zero in the range [-32, 31] + +## c_lui_imm +c.lui t0, 0 # CHECK: :[[@LINE]]:11: error: immediate must be in [0xfffe0, 0xfffff] or [1, 31] +c.lui t0, 32 # CHECK: :[[@LINE]]:11: error: immediate must be in [0xfffe0, 0xfffff] or [1, 31] +c.lui t0, 0xffffdf # CHECK: :[[@LINE]]:11: error: immediate must be in [0xfffe0, 0xfffff] or [1, 31] +c.lui t0, 0x1000000 # CHECK: :[[@LINE]]:11: error: immediate must be in [0xfffe0, 0xfffff] or [1, 31] + +## uimm8_lsb00 +c.lwsp ra, 256(sp) # CHECK: :[[@LINE]]:13: error: immediate must be a multiple of 4 bytes in the range [0, 252] +c.swsp ra, -4(sp) # CHECK: :[[@LINE]]:13: error: immediate must be a multiple of 4 bytes in the range [0, 252] +## uimm7_lsb00 +c.lw s0, -4(sp) # CHECK: :[[@LINE]]:11: error: immediate must be a multiple of 4 bytes in the range [0, 124] +c.sw s0, 128(sp) # CHECK: :[[@LINE]]:11: error: immediate must be a multiple of 4 bytes in the range [0, 124] + +## simm9_lsb0 +c.bnez s1, -258 # CHECK: :[[@LINE]]:13: error: immediate must be a multiple of 2 bytes in the range [-256, 254] +c.beqz a0, 256 # CHECK: :[[@LINE]]:13: error: immediate must be a multiple of 2 bytes in the range [-256, 254] + +## simm12_lsb0 +c.j 2048 # CHECK: :[[@LINE]]:5: error: immediate must be a multiple of 2 bytes in the range [-2048, 2046] +c.jal -2050 # CHECK: :[[@LINE]]:7: error: immediate must be a multiple of 2 bytes in the range [-2048, 2046] + +## uimm10_lsb00nonzero +c.addi4spn a0, sp, 0 # CHECK: :[[@LINE]]:21: error: immediate must be a multiple of 4 bytes in the range [4, 1020] +c.addi4spn a0, sp, 1024 # CHECK: :[[@LINE]]:21: error: immediate must be a multiple of 4 bytes in the range [4, 1020] + +## simm10_lsb0000nonzero +c.addi16sp sp, -528 # CHECK: :[[@LINE]]:17: error: immediate must be a multiple of 16 bytes and non-zero in the range [-512, 496] +c.addi16sp sp, 512 # CHECK: :[[@LINE]]:17: error: immediate must be a multiple of 16 bytes and non-zero in the range [-512, 496] +c.addi16sp sp, 0 # CHECK: :[[@LINE]]:17: error: immediate must be a multiple of 16 bytes and non-zero in the range [-512, 496] diff --git a/llvm/test/MC/RISCV/rv32zca-valid.s b/llvm/test/MC/RISCV/rv32zca-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zca-valid.s @@ -0,0 +1,111 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zca -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+experimental-zca < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zca -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefixes=CHECK-OBJ,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zca -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zca < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zca -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# TODO: more exhaustive testing of immediate encoding. + +# CHECK-ASM-AND-OBJ: c.lwsp ra, 0(sp) +# CHECK-ASM: encoding: [0x82,0x40] +c.lwsp ra, 0(sp) +# CHECK-ASM-AND-OBJ: c.swsp ra, 252(sp) +# CHECK-ASM: encoding: [0x86,0xdf] +c.swsp ra, 252(sp) +# CHECK-ASM-AND-OBJ: c.lw a2, 0(a0) +# CHECK-ASM: encoding: [0x10,0x41] +c.lw a2, 0(a0) +# CHECK-ASM-AND-OBJ: c.sw a5, 124(a3) +# CHECK-ASM: encoding: [0xfc,0xde] +c.sw a5, 124(a3) + +# CHECK-OBJ: c.j 0xfffff808 +# CHECK-ASM: c.j -2048 +# CHECK-ASM: encoding: [0x01,0xb0] +c.j -2048 +# CHECK-ASM-AND-OBJ: c.jr a7 +# CHECK-ASM: encoding: [0x82,0x88] +c.jr a7 +# CHECK-ASM-AND-OBJ: c.jalr a1 +# CHECK-ASM: encoding: [0x82,0x95] +c.jalr a1 +# CHECK-OBJ: c.beqz a3, 0xffffff0e +# CHECK-ASM: c.beqz a3, -256 +# CHECK-ASM: encoding: [0x81,0xd2] +c.beqz a3, -256 +# CHECK-OBJ: c.bnez a5, 0x10e +# CHECK-ASM: c.bnez a5, 254 +# CHECK-ASM: encoding: [0xfd,0xef] +c.bnez a5, 254 + +# CHECK-ASM-AND-OBJ: c.li a7, 31 +# CHECK-ASM: encoding: [0xfd,0x48] +c.li a7, 31 +# CHECK-ASM-AND-OBJ: c.addi a3, -32 +# CHECK-ASM: encoding: [0x81,0x16] +c.addi a3, -32 +# CHECK-ASM-AND-OBJ: c.addi16sp sp, -512 +# CHECK-ASM: encoding: [0x01,0x71] +c.addi16sp sp, -512 +# CHECK-ASM-AND-OBJ: c.addi16sp sp, 496 +# CHECK-ASM: encoding: [0x7d,0x61] +c.addi16sp sp, 496 +# CHECK-ASM-AND-OBJ: c.addi4spn a3, sp, 1020 +# CHECK-ASM: encoding: [0xf4,0x1f] +c.addi4spn a3, sp, 1020 +# CHECK-ASM-AND-OBJ: c.addi4spn a3, sp, 4 +# CHECK-ASM: encoding: [0x54,0x00] +c.addi4spn a3, sp, 4 +# CHECK-ASM-AND-OBJ: c.slli a1, 1 +# CHECK-ASM: encoding: [0x86,0x05] +c.slli a1, 1 +# CHECK-ASM-AND-OBJ: c.srli a3, 31 +# CHECK-ASM: encoding: [0xfd,0x82] +c.srli a3, 31 +# CHECK-ASM-AND-OBJ: c.srai a4, 2 +# CHECK-ASM: encoding: [0x09,0x87] +c.srai a4, 2 +# CHECK-ASM-AND-OBJ: c.andi a5, 15 +# CHECK-ASM: encoding: [0xbd,0x8b] +c.andi a5, 15 +# CHECK-ASM-AND-OBJ: c.mv a7, s0 +# CHECK-ASM: encoding: [0xa2,0x88] +c.mv a7, s0 +# CHECK-ASM-AND-OBJ: c.and a1, a2 +# CHECK-ASM: encoding: [0xf1,0x8d] +c.and a1, a2 +# CHECK-ASM-AND-OBJ: c.or a2, a3 +# CHECK-ASM: encoding: [0x55,0x8e] +c.or a2, a3 +# CHECK-ASM-AND-OBJ: c.xor a3, a4 +# CHECK-ASM: encoding: [0xb9,0x8e] +c.xor a3, a4 +# CHECK-ASM-AND-OBJ: c.sub a4, a5 +# CHECK-ASM: encoding: [0x1d,0x8f] +c.sub a4, a5 +# CHECK-ASM-AND-OBJ: c.nop +# CHECK-ASM: encoding: [0x01,0x00] +c.nop +# CHECK-ASM-AND-OBJ: c.ebreak +# CHECK-ASM: encoding: [0x02,0x90] +c.ebreak +# CHECK-ASM-AND-OBJ: c.lui s0, 1 +# CHECK-ASM: encoding: [0x05,0x64] +c.lui s0, 1 +# CHECK-ASM-AND-OBJ: c.lui s0, 31 +# CHECK-ASM: encoding: [0x7d,0x64] +c.lui s0, 31 +# CHECK-ASM-AND-OBJ: c.lui s0, 1048544 +# CHECK-ASM: encoding: [0x01,0x74] +c.lui s0, 0xfffe0 +# CHECK-ASM-AND-OBJ: c.lui s0, 1048575 +# CHECK-ASM: encoding: [0x7d,0x74] +c.lui s0, 0xfffff +# CHECK-ASM-AND-OBJ: c.unimp +# CHECK-ASM: encoding: [0x00,0x00] +c.unimp diff --git a/llvm/test/MC/RISCV/rv32zcb-Invalid.s b/llvm/test/MC/RISCV/rv32zcb-Invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zcb-Invalid.s @@ -0,0 +1,17 @@ +# RUN: not llvm-mc -triple=riscv32 -mattr=experimental-zcb -riscv-no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-ERROR %s + +# CHECK-ERROR: error: immediate must be an integer in the range [0, 3] +c.lbu a5, 10(a4) + +# CHECK-ERROR: error: immediate must be one of [0, 2] +c.lhu a5, 10(a4) + +# CHECK-ERROR: error: immediate must be one of [0, 2] +c.lh a5, 10(a4) + +# CHECK-ERROR: error: immediate must be an integer in the range [0, 3] +c.sb a5, 10(a4) + +# CHECK-ERROR: error: immediate must be one of [0, 2] +c.sh a5, 10(a4) \ No newline at end of file diff --git a/llvm/test/MC/RISCV/rv32zcb-valid.s b/llvm/test/MC/RISCV/rv32zcb-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zcb-valid.s @@ -0,0 +1,65 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=experimental-zcb -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=experimental-zcb -mattr=m < %s \ +# RUN: | llvm-objdump --mattr=experimental-zcb --mattr=m -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: c.lbu a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x83] +c.lbu a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.lhu a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x87] +c.lhu a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.lh a5, 2(a4) +# CHECK-ASM: encoding: [0x7c,0x87] +c.lh a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.sb a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x8b] +c.sb a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.sh a5, 2(a4) +# CHECK-ASM: encoding: [0x7c,0x8f] +c.sh a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.lh a5, 2(a4) +# CHECK-ASM: encoding: [0x7c,0x87] +lh a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.lbu a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x83] +lbu a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.lhu a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x87] +lhu a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.sb a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x8b] +sb a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.sh a5, 2(a4) +# CHECK-ASM: encoding: [0x7c,0x8f] +sh a5, 2(a4) + +# CHECK-ASM-AND-OBJ: lh a5, 3(a4) +# CHECK-ASM: encoding: [0x83,0x17,0x37,0x00] +lh a5, 3(a4) + +# CHECK-ASM-AND-OBJ: lbu a5, 4(a4) +# CHECK-ASM: encoding: [0x83,0x47,0x47,0x00] +lbu a5, 4(a4) + +# CHECK-ASM-AND-OBJ: lhu a5, 3(a4) +# CHECK-ASM: encoding: [0x83,0x57,0x37,0x00] +lhu a5, 3(a4) + +# CHECK-ASM-AND-OBJ: sb a5, 4(a4) +# CHECK-ASM: encoding: [0x23,0x02,0xf7,0x00] +sb a5, 4(a4) + +# CHECK-ASM-AND-OBJ: sh a5, 3(a4) +# CHECK-ASM: encoding: [0xa3,0x11,0xf7,0x00] +sh a5, 3(a4) \ No newline at end of file diff --git a/llvm/test/MC/RISCV/rv32zcmp-Invalid.s b/llvm/test/MC/RISCV/rv32zcmp-Invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zcmp-Invalid.s @@ -0,0 +1,14 @@ +# RUN: not llvm-mc -triple=riscv32 -mattr=experimental-zcmp -riscv-no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-ERROR %s + +# CHECK-ERROR: error: invalid operand for instruction +cm.mvsa01 a1, a2 + +# CHECK-ERROR: error: invalid operand for instruction +cm.mva01s a1, a2 + +# CHECK-ERROR: error: invalid register list, {ra, s0-s10} is not supported. +cm.popretz {ra, s0-s10}, 112 + +# CHECK-ERROR: error: This stack adjustment is invalid for this instruction and register list, Please refer to Zce spec for a detailed range of stack adjustment. +cm.popretz {ra, s0-s1}, 112 \ No newline at end of file diff --git a/llvm/test/MC/RISCV/rv32zcmp-valid.s b/llvm/test/MC/RISCV/rv32zcmp-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zcmp-valid.s @@ -0,0 +1,285 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=experimental-zcmp -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=experimental-zcmp < %s \ +# RUN: | llvm-objdump --mattr=-c,experimental-zcmp -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: cm.mvsa01 s1, s0 +# CHECK-ASM: encoding: [0xa2,0xac] +cm.mvsa01 s1, s0 + +# CHECK-ASM-AND-OBJ: cm.mva01s s1, s0 +# CHECK-ASM: encoding: [0xe2,0xac] +cm.mva01s s1, s0 + +# CHECK-ASM-AND-OBJ: cm.popret {ra}, 16 +# CHECK-ASM: encoding: [0x42,0xbe] +cm.popret {ra}, 16 + +# CHECK-ASM-AND-OBJ: cm.popret {ra}, 16 +# CHECK-ASM: encoding: [0x42,0xbe] +cm.popret {x1}, 16 + +# CHECK-ASM-AND-OBJ: cm.popret {ra}, 32 +# CHECK-ASM: encoding: [0x46,0xbe] +cm.popret {ra}, 32 + +# CHECK-ASM-AND-OBJ: cm.popret {ra}, 32 +# CHECK-ASM: encoding: [0x46,0xbe] +cm.popret {x1}, 32 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0}, 64 +# CHECK-ASM: encoding: [0x5e,0xbe] +cm.popret {ra, s0}, 64 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0}, 64 +# CHECK-ASM: encoding: [0x5e,0xbe] +cm.popret {x1, x8}, 64 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s1}, 16 +# CHECK-ASM: encoding: [0x62,0xbe] +cm.popret {ra,s0-s1}, 16 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s1}, 16 +# CHECK-ASM: encoding: [0x62,0xbe] +cm.popret {x1, x8-x9}, 16 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s2}, 32 +# CHECK-ASM: encoding: [0x76,0xbe] +cm.popret {ra, s0-s2}, 32 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s2}, 32 +# CHECK-ASM: encoding: [0x76,0xbe] +cm.popret {x1, x8-x9, x18}, 32 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s3}, 32 +# CHECK-ASM: encoding: [0x82,0xbe] +cm.popret {ra, s0-s3}, 32 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s3}, 32 +# CHECK-ASM: encoding: [0x82,0xbe] +cm.popret {x1, x8-x9, x18-x19}, 32 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s5}, 32 +# CHECK-ASM: encoding: [0xa2,0xbe] +cm.popret {ra, s0-s5}, 32 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s5}, 32 +# CHECK-ASM: encoding: [0xa2,0xbe] +cm.popret {x1, x8-x9, x18-x21}, 32 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s7}, 48 +# CHECK-ASM: encoding: [0xc2,0xbe] +cm.popret {ra, s0-s7}, 48 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s7}, 48 +# CHECK-ASM: encoding: [0xc2,0xbe] +cm.popret {x1, x8-x9, x18-x23}, 48 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s11}, 112 +# CHECK-ASM: encoding: [0xfe,0xbe] +cm.popret {ra, s0-s11}, 112 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s11}, 112 +# CHECK-ASM: encoding: [0xfe,0xbe] +cm.popret {x1, x8-x9, x18-x27}, 112 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra}, 16 +# CHECK-ASM: encoding: [0x42,0xbc] +cm.popretz {ra}, 16 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra}, 16 +# CHECK-ASM: encoding: [0x42,0xbc] +cm.popretz {x1}, 16 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra}, 32 +# CHECK-ASM: encoding: [0x46,0xbc] +cm.popretz {ra}, 32 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra}, 32 +# CHECK-ASM: encoding: [0x46,0xbc] +cm.popretz {x1}, 32 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0}, 64 +# CHECK-ASM: encoding: [0x5e,0xbc] +cm.popretz {ra, s0}, 64 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0}, 64 +# CHECK-ASM: encoding: [0x5e,0xbc] +cm.popretz {x1, x8}, 64 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s1}, 16 +# CHECK-ASM: encoding: [0x62,0xbc] +cm.popretz {ra, s0-s1}, 16 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s1}, 16 +# CHECK-ASM: encoding: [0x62,0xbc] +cm.popretz {x1, x8-x9}, 16 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s2}, 32 +# CHECK-ASM: encoding: [0x76,0xbc] +cm.popretz {ra, s0-s2}, 32 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s2}, 32 +# CHECK-ASM: encoding: [0x76,0xbc] +cm.popretz {x1, x8-x9, x18}, 32 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s3}, 32 +# CHECK-ASM: encoding: [0x82,0xbc] +cm.popretz {ra, s0-s3}, 32 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s3}, 32 +# CHECK-ASM: encoding: [0x82,0xbc] +cm.popretz {x1, x8-x9, x18-x19}, 32 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s5}, 32 +# CHECK-ASM: encoding: [0xa2,0xbc] +cm.popretz {ra, s0-s5}, 32 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s5}, 32 +# CHECK-ASM: encoding: [0xa2,0xbc] +cm.popretz {x1, x8-x9, x18-x21}, 32 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s7}, 48 +# CHECK-ASM: encoding: [0xc2,0xbc] +cm.popretz {ra, s0-s7}, 48 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s7}, 48 +# CHECK-ASM: encoding: [0xc2,0xbc] +cm.popretz {x1, x8-x9, x18-x23}, 48 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s11}, 112 +# CHECK-ASM: encoding: [0xfe,0xbc] +cm.popretz {ra, s0-s11}, 112 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s11}, 112 +# CHECK-ASM: encoding: [0xfe,0xbc] +cm.popretz {x1, x8-x9, x18-x27}, 112 + +# CHECK-ASM-AND-OBJ: cm.pop {ra}, 16 +# CHECK-ASM: encoding: [0x42,0xba] +cm.pop {ra}, 16 + +# CHECK-ASM-AND-OBJ: cm.pop {ra}, 16 +# CHECK-ASM: encoding: [0x42,0xba] +cm.pop {x1}, 16 + +# CHECK-ASM-AND-OBJ: cm.pop {ra}, 32 +# CHECK-ASM: encoding: [0x46,0xba] +cm.pop {ra}, 32 + +# CHECK-ASM-AND-OBJ: cm.pop {ra}, 32 +# CHECK-ASM: encoding: [0x46,0xba] +cm.pop {x1}, 32 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0}, 16 +# CHECK-ASM: encoding: [0x52,0xba] +cm.pop {ra, s0}, 16 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0}, 16 +# CHECK-ASM: encoding: [0x52,0xba] +cm.pop {x1, x8}, 16 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s1}, 32 +# CHECK-ASM: encoding: [0x66,0xba] +cm.pop {ra, s0-s1}, 32 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s1}, 32 +# CHECK-ASM: encoding: [0x66,0xba] +cm.pop {x1, x8-x9}, 32 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s2}, 32 +# CHECK-ASM: encoding: [0x76,0xba] +cm.pop {ra, s0-s2}, 32 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s2}, 32 +# CHECK-ASM: encoding: [0x76,0xba] +cm.pop {x1, x8-x9, x18}, 32 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s5}, 32 +# CHECK-ASM: encoding: [0xa2,0xba] +cm.pop {ra, s0-s5}, 32 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s5}, 32 +# CHECK-ASM: encoding: [0xa2,0xba] +cm.pop {x1, x8-x9, x18-x21}, 32 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s7}, 48 +# CHECK-ASM: encoding: [0xc2,0xba] +cm.pop {ra, s0-s7}, 48 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s7}, 48 +# CHECK-ASM: encoding: [0xc2,0xba] +cm.pop {x1, x8-x9, x18-x23}, 48 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s11}, 64 +# CHECK-ASM: encoding: [0xf2,0xba] +cm.pop {ra, s0-s11}, 64 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s11}, 64 +# CHECK-ASM: encoding: [0xf2,0xba] +cm.pop {x1, x8-x9, x18-x27}, 64 + +# CHECK-ASM-AND-OBJ: cm.push {ra}, -16 +# CHECK-ASM: encoding: [0x42,0xb8] +cm.push {ra}, -16 + +# CHECK-ASM-AND-OBJ: cm.push {ra}, -16 +# CHECK-ASM: encoding: [0x42,0xb8] +cm.push {x1}, -16 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0}, -32 +# CHECK-ASM: encoding: [0x56,0xb8] +cm.push {ra, s0}, -32 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0}, -32 +# CHECK-ASM: encoding: [0x56,0xb8] +cm.push {x1, x8}, -32 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s1}, -16 +# CHECK-ASM: encoding: [0x62,0xb8] +cm.push {ra, s0-s1}, -16 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s1}, -16 +# CHECK-ASM: encoding: [0x62,0xb8] +cm.push {x1, x8-x9}, -16 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s3}, -32 +# CHECK-ASM: encoding: [0x82,0xb8] +cm.push {ra, s0-s3}, -32 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s3}, -32 +# CHECK-ASM: encoding: [0x82,0xb8] +cm.push {x1, x8-x9, x18-x19}, -32 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s7}, -48 +# CHECK-ASM: encoding: [0xc2,0xb8] +cm.push {ra, s0-s7}, -48 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s7}, -48 +# CHECK-ASM: encoding: [0xc2,0xb8] +cm.push {x1, x8-x9, x18-x23}, -48 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s7}, -64 +# CHECK-ASM: encoding: [0xc6,0xb8] +cm.push {ra, s0-s7}, -64 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s7}, -64 +# CHECK-ASM: encoding: [0xc6,0xb8] +cm.push {x1, x8-x9, x18-x23}, -64 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s11}, -80 +# CHECK-ASM: encoding: [0xf6,0xb8] +cm.push {ra, s0-s11}, -80 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s11}, -80 +# CHECK-ASM: encoding: [0xf6,0xb8] +cm.push {x1, x8-x9, x18-x27}, -80 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s11}, -112 +# CHECK-ASM: encoding: [0xfe,0xb8] +cm.push {ra, s0-s11}, -112 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s11}, -112 +# CHECK-ASM: encoding: [0xfe,0xb8] +cm.push {x1, x8-x9, x18-x27}, -112 diff --git a/llvm/test/MC/RISCV/rv64zca-aliases-valid.s b/llvm/test/MC/RISCV/rv64zca-aliases-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zca-aliases-valid.s @@ -0,0 +1,105 @@ +# RUN: llvm-mc -triple=riscv64 -mattr=+experimental-zca -riscv-no-aliases < %s \ +# RUN: | FileCheck -check-prefixes=CHECK-EXPAND,CHECK-INST %s +# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+experimental-zca < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zca -d -M no-aliases - \ +# RUN: | FileCheck -check-prefixes=CHECK-EXPAND,CHECK-INST %s + +# The following check prefixes are used in this test: +# CHECK-INST.....Match the canonical instr (tests alias to instr. mapping) +# CHECK-EXPAND...Match canonical instr. unconditionally (tests alias expansion) +# CHECK-INST: {{^}} + +# CHECK-EXPAND: c.li a0, 0 +li x10, 0 +# CHECK-EXPAND: c.li a0, 1 +li x10, 1 +# CHECK-EXPAND: c.li a0, -1 +li x10, -1 +# CHECK-EXPAND: addi a0, zero, 2047 +li x10, 2047 +# CHECK-EXPAND: addi a0, zero, -2047 +li x10, -2047 +# CHECK-EXPAND: c.lui a1, 1 +# CHECK-EXPAND: addiw a1, a1, -2048 +li x11, 2048 +# CHECK-EXPAND: addi a1, zero, -2048 +li x11, -2048 +# CHECK-EXPAND: c.lui a1, 1 +# CHECK-EXPAND: addiw a1, a1, -2047 +li x11, 2049 +# CHECK-EXPAND: c.lui a1, 1048575 +# CHECK-EXPAND: addiw a1, a1, 2047 +li x11, -2049 +# CHECK-EXPAND: c.lui a1, 1 +# CHECK-EXPAND: c.addiw a1, -1 +li x11, 4095 +# CHECK-EXPAND: lui a1, 1048575 +# CHECK-EXPAND: c.addiw a1, 1 +li x11, -4095 +# CHECK-EXPAND: c.lui a2, 1 +li x12, 4096 +# CHECK-EXPAND: lui a2, 1048575 +li x12, -4096 +# CHECK-EXPAND: c.lui a2, 1 +# CHECK-EXPAND: c.addiw a2, 1 +li x12, 4097 +# CHECK-EXPAND: lui a2, 1048575 +# CHECK-EXPAND: c.addiw a2, -1 +li x12, -4097 +# CHECK-EXPAND: lui a2, 524288 +# CHECK-EXPAND: c.addiw a2, -1 +li x12, 2147483647 +# CHECK-EXPAND: lui a2, 524288 +# CHECK-EXPAND: c.addiw a2, 1 +li x12, -2147483647 +# CHECK-EXPAND: lui a2, 524288 +li x12, -2147483648 +# CHECK-EXPAND: lui a2, 524288 +li x12, -0x80000000 + +# CHECK-EXPAND: c.li a2, 1 +# CHECK-EXPAND: c.slli a2, 31 +li x12, 0x80000000 +# CHECK-EXPAND: c.li a2, -1 +# CHECK-EXPAND: c.srli a2, 32 +li x12, 0xFFFFFFFF + +# CHECK-EXPAND: c.li t0, 1 +# CHECK-EXPAND: c.slli t0, 32 +li t0, 0x100000000 +# CHECK-EXPAND: c.li t1, -1 +# CHECK-EXPAND: c.slli t1, 63 +li t1, 0x8000000000000000 +# CHECK-EXPAND: c.li t1, -1 +# CHECK-EXPAND: c.slli t1, 63 +li t1, -0x8000000000000000 +# CHECK-EXPAND: lui t2, 9321 +# CHECK-EXPAND: addiw t2, t2, -1329 +# CHECK-EXPAND: c.slli t2, 35 +li t2, 0x1234567800000000 +# CHECK-EXPAND: c.li t3, 7 +# CHECK-EXPAND: c.slli t3, 36 +# CHECK-EXPAND: c.addi t3, 11 +# CHECK-EXPAND: c.slli t3, 24 +# CHECK-EXPAND: c.addi t3, 15 +li t3, 0x700000000B00000F +# CHECK-EXPAND: lui t4, 583 +# CHECK-EXPAND: addiw t4, t4, -1875 +# CHECK-EXPAND: c.slli t4, 14 +# CHECK-EXPAND: addi t4, t4, -947 +# CHECK-EXPAND: c.slli t4, 12 +# CHECK-EXPAND: addi t4, t4, 1511 +# CHECK-EXPAND: c.slli t4, 13 +# CHECK-EXPAND: addi t4, t4, -272 +li t4, 0x123456789abcdef0 +# CHECK-EXPAND: c.li t5, -1 +li t5, 0xFFFFFFFFFFFFFFFF + +# CHECK-EXPAND: c.ld s0, 0(s1) +c.ld x8, (x9) +# CHECK-EXPAND: c.sd s0, 0(s1) +c.sd x8, (x9) +# CHECK-EXPAND: c.ldsp s0, 0(sp) +c.ldsp x8, (x2) +# CHECK-EXPAND: c.sdsp s0, 0(sp) +c.sdsp x8, (x2) diff --git a/llvm/test/MC/RISCV/rv64zca-hints-valid.s b/llvm/test/MC/RISCV/rv64zca-hints-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zca-hints-valid.s @@ -0,0 +1,9 @@ +# RUN: llvm-mc %s -triple riscv64 -mattr=+experimental-zca -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zca < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zca -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: c.slli zero, 63 +# CHECK-ASM: encoding: [0x7e,0x10] +c.slli x0, 63 diff --git a/llvm/test/MC/RISCV/rv64zca-invalid.s b/llvm/test/MC/RISCV/rv64zca-invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zca-invalid.s @@ -0,0 +1,32 @@ +# RUN: not llvm-mc -triple=riscv64 -mattr=+experimental-zca < %s 2>&1 | FileCheck %s + +## GPRC +c.ld ra, 4(sp) # CHECK: :[[@LINE]]:6: error: invalid operand for instruction +c.sd sp, 4(sp) # CHECK: :[[@LINE]]:6: error: invalid operand for instruction +c.addw a0, a7 # CHECK: :[[@LINE]]:14: error: invalid operand for instruction +c.subw a0, a6 # CHECK: :[[@LINE]]:14: error: invalid operand for instruction + +## GPRNoX0 +c.ldsp x0, 4(sp) # CHECK: :[[@LINE]]:9: error: invalid operand for instruction +c.ldsp zero, 4(sp) # CHECK: :[[@LINE]]:9: error: invalid operand for instruction + +# Out of range immediates + +## uimmlog2xlennonzero +c.slli t0, 64 # CHECK: :[[@LINE]]:12: error: immediate must be an integer in the range [1, 63] +c.srli a0, -1 # CHECK: :[[@LINE]]:12: error: immediate must be an integer in the range [1, 63] +c.srai a0, 0 # CHECK: :[[@LINE]]:12: error: immediate must be an integer in the range [1, 63] + +## simm6 +c.addiw t0, -33 # CHECK: :[[@LINE]]:13: error: immediate must be an integer in the range [-32, 31] +c.addiw t0, 32 # CHECK: :[[@LINE]]:13: error: immediate must be an integer in the range [-32, 31] +c.addiw t0, foo # CHECK: :[[@LINE]]:13: error: immediate must be an integer in the range [-32, 31] +c.addiw t0, %lo(foo) # CHECK: :[[@LINE]]:13: error: immediate must be an integer in the range [-32, 31] +c.addiw t0, %hi(foo) # CHECK: :[[@LINE]]:13: error: immediate must be an integer in the range [-32, 31] + +## uimm9_lsb000 +c.ldsp ra, 512(sp) # CHECK: :[[@LINE]]:13: error: immediate must be a multiple of 8 bytes in the range [0, 504] +c.sdsp ra, -8(sp) # CHECK: :[[@LINE]]:13: error: immediate must be a multiple of 8 bytes in the range [0, 504] +## uimm8_lsb000 +c.ld s0, -8(sp) # CHECK: :[[@LINE]]:11: error: immediate must be a multiple of 8 bytes in the range [0, 248] +c.sd s0, 256(sp) # CHECK: :[[@LINE]]:11: error: immediate must be a multiple of 8 bytes in the range [0, 248] diff --git a/llvm/test/MC/RISCV/rv64zca-valid.s b/llvm/test/MC/RISCV/rv64zca-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zca-valid.s @@ -0,0 +1,68 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zca -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+c < %s \ +# RUN: | llvm-objdump --mattr=+c -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# +# +# RUN: not llvm-mc -triple riscv64 \ +# RUN: -riscv-no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s +# RUN: not llvm-mc -triple riscv32 -mattr=+c \ +# RUN: -riscv-no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-NO-RV64 %s + +# TODO: more exhaustive testing of immediate encoding. + +# CHECK-ASM-AND-OBJ: c.ldsp ra, 0(sp) +# CHECK-ASM: encoding: [0x82,0x60] +# CHECK-NO-EXT: error: instruction requires the following: 'C' (Compressed Instructions) +# CHECK-NO-RV64: error: instruction requires the following: RV64I Base Instruction Set +c.ldsp ra, 0(sp) +# CHECK-ASM-AND-OBJ: c.sdsp ra, 504(sp) +# CHECK-ASM: encoding: [0x86,0xff] +# CHECK-NO-EXT: error: instruction requires the following: 'C' (Compressed Instructions) +# CHECK-NO-RV64: error: instruction requires the following: RV64I Base Instruction Set +c.sdsp ra, 504(sp) +# CHECK-ASM-AND-OBJ: c.ld a4, 0(a3) +# CHECK-ASM: encoding: [0x98,0x62] +# CHECK-NO-EXT: error: instruction requires the following: 'C' (Compressed Instructions) +# CHECK-NO-RV64: error: instruction requires the following: RV64I Base Instruction Set +c.ld a4, 0(a3) +# CHECK-ASM-AND-OBJ: c.sd a5, 248(a3) +# CHECK-ASM: encoding: [0xfc,0xfe] +# CHECK-NO-EXT: error: instruction requires the following: 'C' (Compressed Instructions) +# CHECK-NO-RV64: error: instruction requires the following: RV64I Base Instruction Set +c.sd a5, 248(a3) + +# CHECK-ASM-AND-OBJ: c.subw a3, a4 +# CHECK-ASM: encoding: [0x99,0x9e] +c.subw a3, a4 +# CHECK-ASM-AND-OBJ: c.addw a0, a2 +# CHECK-ASM: encoding: [0x31,0x9d] +# CHECK-NO-EXT: error: instruction requires the following: 'C' (Compressed Instructions) +# CHECK-NO-RV64: error: instruction requires the following: RV64I Base Instruction Set +c.addw a0, a2 + +# CHECK-ASM-AND-OBJ: c.addiw a3, -32 +# CHECK-ASM: encoding: [0x81,0x36] +# CHECK-NO-EXT: error: instruction requires the following: 'C' (Compressed Instructions) +# CHECK-NO-RV64: error: instruction requires the following: RV64I Base Instruction Set +c.addiw a3, -32 +# CHECK-ASM-AND-OBJ: c.addiw a3, 31 +# CHECK-ASM: encoding: [0xfd,0x26] +# CHECK-NO-EXT: error: instruction requires the following: 'C' (Compressed Instructions) +# CHECK-NO-RV64: error: instruction requires the following: RV64I Base Instruction Set +c.addiw a3, 31 + +# CHECK-ASM-AND-OBJ: c.slli s0, 1 +# CHECK-ASM: encoding: [0x06,0x04] +# CHECK-NO-EXT: error: instruction requires the following: 'C' (Compressed Instructions) +# CHECK-NO-RV64: error: instruction requires the following: RV64I Base Instruction Set +c.slli s0, 1 +# CHECK-ASM-AND-OBJ: c.srli a3, 63 +# CHECK-ASM: encoding: [0xfd,0x92] +c.srli a3, 63 +# CHECK-ASM-AND-OBJ: c.srai a2, 63 +# CHECK-ASM: encoding: [0x7d,0x96] +c.srai a2, 63 diff --git a/llvm/test/MC/RISCV/rv64zcb-Invalid.s b/llvm/test/MC/RISCV/rv64zcb-Invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zcb-Invalid.s @@ -0,0 +1,17 @@ +# RUN: not llvm-mc -triple=riscv64 -mattr=experimental-zcb -riscv-no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-ERROR %s + +# CHECK-ERROR: error: immediate must be an integer in the range [0, 3] +c.lbu a5, 10(a4) + +# CHECK-ERROR: error: immediate must be one of [0, 2] +c.lhu a5, 10(a4) + +# CHECK-ERROR: error: immediate must be one of [0, 2] +c.lh a5, 10(a4) + +# CHECK-ERROR: error: immediate must be an integer in the range [0, 3] +c.sb a5, 10(a4) + +# CHECK-ERROR: error: immediate must be one of [0, 2] +c.sh a5, 10(a4) \ No newline at end of file diff --git a/llvm/test/MC/RISCV/rv64zcb-valid.s b/llvm/test/MC/RISCV/rv64zcb-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zcb-valid.s @@ -0,0 +1,93 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=experimental-zcb -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=experimental-zcb -mattr=m < %s \ +# RUN: | llvm-objdump --mattr=experimental-zcb --mattr=m -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: c.zext.b s0 +# CHECK-ASM: encoding: [0x61,0x9c] +c.zext.b s0 + +# CHECK-ASM-AND-OBJ: c.sext.b s0 +# CHECK-ASM: encoding: [0x65,0x9c] +c.sext.b s0 + +# CHECK-ASM-AND-OBJ: c.zext.h s0 +# CHECK-ASM: encoding: [0x69,0x9c] +c.zext.h s0 + +# CHECK-ASM-AND-OBJ: c.sext.h s0 +# CHECK-ASM: encoding: [0x6d,0x9c] +c.sext.h s0 + +# CHECK-ASM-AND-OBJ: c.zext.w s0 +# CHECK-ASM: encoding: [0x71,0x9c] +c.zext.w s0 + +# CHECK-ASM-AND-OBJ: c.not s0 +# CHECK-ASM: encoding: [0x75,0x9c] +c.not s0 + +# CHECK-ASM-AND-OBJ: c.mul s0, s1 +# CHECK-ASM: encoding: [0x45,0x9c] +c.mul s0, s1 + +# CHECK-ASM-AND-OBJ: c.lbu a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x83] +c.lbu a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.lhu a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x87] +c.lhu a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.lh a5, 2(a4) +# CHECK-ASM: encoding: [0x7c,0x87] +c.lh a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.sb a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x8b] +c.sb a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.sh a5, 2(a4) +# CHECK-ASM: encoding: [0x7c,0x8f] +c.sh a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.lh a5, 2(a4) +# CHECK-ASM: encoding: [0x7c,0x87] +lh a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.lbu a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x83] +lbu a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.lhu a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x87] +lhu a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.sb a5, 2(a4) +# CHECK-ASM: encoding: [0x3c,0x8b] +sb a5, 2(a4) + +# CHECK-ASM-AND-OBJ: c.sh a5, 2(a4) +# CHECK-ASM: encoding: [0x7c,0x8f] +sh a5, 2(a4) + +# CHECK-ASM-AND-OBJ: lh a5, 3(a4) +# CHECK-ASM: encoding: [0x83,0x17,0x37,0x00] +lh a5, 3(a4) + +# CHECK-ASM-AND-OBJ: lbu a5, 4(a4) +# CHECK-ASM: encoding: [0x83,0x47,0x47,0x00] +lbu a5, 4(a4) + +# CHECK-ASM-AND-OBJ: lhu a5, 3(a4) +# CHECK-ASM: encoding: [0x83,0x57,0x37,0x00] +lhu a5, 3(a4) + +# CHECK-ASM-AND-OBJ: sb a5, 4(a4) +# CHECK-ASM: encoding: [0x23,0x02,0xf7,0x00] +sb a5, 4(a4) + +# CHECK-ASM-AND-OBJ: sh a5, 3(a4) +# CHECK-ASM: encoding: [0xa3,0x11,0xf7,0x00] +sh a5, 3(a4) \ No newline at end of file diff --git a/llvm/test/MC/RISCV/rv64zcmp-Invalid.s b/llvm/test/MC/RISCV/rv64zcmp-Invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zcmp-Invalid.s @@ -0,0 +1,14 @@ +# RUN: not llvm-mc -triple=riscv64 -mattr=experimental-zcmp -riscv-no-aliases -show-encoding < %s 2>&1 \ +# RUN: | FileCheck -check-prefixes=CHECK-ERROR %s + +# CHECK-ERROR: error: invalid operand for instruction +cm.mvsa01 a1, a2 + +# CHECK-ERROR: error: invalid operand for instruction +cm.mva01s a1, a2 + +# CHECK-ERROR: error: invalid register list, {ra, s0-s10} is not supported. +cm.popretz {ra, s0-s10}, 112 + +# CHECK-ERROR: error: This stack adjustment is invalid for this instruction and register list, Please refer to Zce spec for a detailed range of stack adjustment. +cm.popretz {ra, s0-s1}, 112 \ No newline at end of file diff --git a/llvm/test/MC/RISCV/rv64zcmp-valid.s b/llvm/test/MC/RISCV/rv64zcmp-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zcmp-valid.s @@ -0,0 +1,149 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=experimental-zcmp -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=experimental-zcmp < %s \ +# RUN: | llvm-objdump --mattr=-c,experimental-zcmp -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: cm.mvsa01 s1, s0 +# CHECK-ASM: encoding: [0xa2,0xac] +cm.mvsa01 s1, s0 + +# CHECK-ASM-AND-OBJ: cm.mva01s s1, s0 +# CHECK-ASM: encoding: [0xe2,0xac] +cm.mva01s s1, s0 + +# CHECK-ASM-AND-OBJ: cm.popret {ra}, 16 +# CHECK-ASM: encoding: [0x42,0xbe] +cm.popret {ra}, 16 + +# CHECK-ASM-AND-OBJ: cm.popret {ra}, 32 +# CHECK-ASM: encoding: [0x46,0xbe] +cm.popret {ra}, 32 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0}, 64 +# CHECK-ASM: encoding: [0x5e,0xbe] +cm.popret {ra, s0}, 64 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s1}, 32 +# CHECK-ASM: encoding: [0x62,0xbe] +cm.popret {ra,s0-s1}, 32 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s2}, 32 +# CHECK-ASM: encoding: [0x72,0xbe] +cm.popret {ra, s0-s2}, 32 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s3}, 64 +# CHECK-ASM: encoding: [0x86,0xbe] +cm.popret {ra, s0-s3}, 64 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s5}, 64 +# CHECK-ASM: encoding: [0xa2,0xbe] +cm.popret {ra, s0-s5}, 64 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s7}, 80 +# CHECK-ASM: encoding: [0xc2,0xbe] +cm.popret {ra, s0-s7}, 80 + +# CHECK-ASM-AND-OBJ: cm.popret {ra, s0-s11}, 112 +# CHECK-ASM: encoding: [0xf2,0xbe] +cm.popret {ra, s0-s11}, 112 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra}, 16 +# CHECK-ASM: encoding: [0x42,0xbc] +cm.popretz {ra}, 16 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra}, 32 +# CHECK-ASM: encoding: [0x46,0xbc] +cm.popretz {ra}, 32 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0}, 64 +# CHECK-ASM: encoding: [0x5e,0xbc] +cm.popretz {ra, s0}, 64 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s1}, 32 +# CHECK-ASM: encoding: [0x62,0xbc] +cm.popretz {ra, s0-s1}, 32 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s2}, 32 +# CHECK-ASM: encoding: [0x72,0xbc] +cm.popretz {ra, s0-s2}, 32 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s3}, 64 +# CHECK-ASM: encoding: [0x86,0xbc] +cm.popretz {ra, s0-s3}, 64 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s5}, 64 +# CHECK-ASM: encoding: [0xa2,0xbc] +cm.popretz {ra, s0-s5}, 64 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s7}, 80 +# CHECK-ASM: encoding: [0xc2,0xbc] +cm.popretz {ra, s0-s7}, 80 + +# CHECK-ASM-AND-OBJ: cm.popretz {ra, s0-s11}, 112 +# CHECK-ASM: encoding: [0xf2,0xbc] +cm.popretz {ra, s0-s11}, 112 + +# CHECK-ASM-AND-OBJ: cm.pop {ra}, 16 +# CHECK-ASM: encoding: [0x42,0xba] +cm.pop {ra}, 16 + +# CHECK-ASM-AND-OBJ: cm.pop {ra}, 32 +# CHECK-ASM: encoding: [0x46,0xba] +cm.pop {ra}, 32 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0}, 16 +# CHECK-ASM: encoding: [0x52,0xba] +cm.pop {ra, s0}, 16 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s1}, 32 +# CHECK-ASM: encoding: [0x62,0xba] +cm.pop {ra, s0-s1}, 32 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s2}, 32 +# CHECK-ASM: encoding: [0x72,0xba] +cm.pop {ra, s0-s2}, 32 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s5}, 64 +# CHECK-ASM: encoding: [0xa2,0xba] +cm.pop {ra, s0-s5}, 64 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s7}, 80 +# CHECK-ASM: encoding: [0xc2,0xba] +cm.pop {ra, s0-s7}, 80 + +# CHECK-ASM-AND-OBJ: cm.pop {ra, s0-s11}, 112 +# CHECK-ASM: encoding: [0xf2,0xba] +cm.pop {ra, s0-s11}, 112 + +# CHECK-ASM-AND-OBJ: cm.push {ra}, -16 +# CHECK-ASM: encoding: [0x42,0xb8] +cm.push {ra}, -16 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0}, -32 +# CHECK-ASM: encoding: [0x56,0xb8] +cm.push {ra, s0}, -32 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s1}, -32 +# CHECK-ASM: encoding: [0x62,0xb8] +cm.push {ra, s0-s1}, -32 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s3}, -64 +# CHECK-ASM: encoding: [0x86,0xb8] +cm.push {ra, s0-s3}, -64 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s7}, -80 +# CHECK-ASM: encoding: [0xc2,0xb8] +cm.push {ra, s0-s7}, -80 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s7}, -80 +# CHECK-ASM: encoding: [0xc2,0xb8] +cm.push {ra, s0-s7}, -80 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s11}, -112 +# CHECK-ASM: encoding: [0xf2,0xb8] +cm.push {ra, s0-s11}, -112 + +# CHECK-ASM-AND-OBJ: cm.push {ra, s0-s11}, -128 +# CHECK-ASM: encoding: [0xf6,0xb8] +cm.push {ra, s0-s11}, -128 \ No newline at end of file