diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index ad0e27d5c24b..3b0e461842c6 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -1,260 +1,260 @@ //===-- RISCVMCCodeEmitter.cpp - Convert RISCV code to machine code -------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the RISCVMCCodeEmitter class. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/RISCVBaseInfo.h" #include "MCTargetDesc/RISCVFixupKinds.h" #include "MCTargetDesc/RISCVMCExpr.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "mccodeemitter" STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); STATISTIC(MCNumFixups, "Number of MC fixups created"); namespace { class RISCVMCCodeEmitter : public MCCodeEmitter { RISCVMCCodeEmitter(const RISCVMCCodeEmitter &) = delete; void operator=(const RISCVMCCodeEmitter &) = delete; MCContext &Ctx; MCInstrInfo const &MCII; public: RISCVMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII) : Ctx(ctx), MCII(MCII) {} ~RISCVMCCodeEmitter() override {} void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; void expandFunctionCall(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; /// TableGen'erated function for getting the binary encoding for an /// instruction. uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; /// Return binary encoding of operand. If the machine operand requires /// relocation, record the relocation and return zero. unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; unsigned getImmOpValueAsr1(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; unsigned getImmOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; }; } // end anonymous namespace MCCodeEmitter *llvm::createRISCVMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx) { return new RISCVMCCodeEmitter(Ctx, MCII); } -// Expand PseudoCALL to AUIPC and JALR with relocation types. -// We expand PseudoCALL while encoding, meaning AUIPC and JALR won't go through -// RISCV MC to MC compressed instruction transformation. This is acceptable -// because AUIPC has no 16-bit form and C_JALR have no immediate operand field. -// We let linker relaxation deal with it. When linker relaxation enabled, -// AUIPC and JALR have chance relax to JAL. If C extension is enabled, -// JAL has chance relax to C_JAL. +// Expand PseudoCALL and PseudoTAIL to AUIPC and JALR with relocation types. +// We expand PseudoCALL and PseudoTAIL while encoding, meaning AUIPC and JALR +// won't go through RISCV MC to MC compressed instruction transformation. This +// is acceptable because AUIPC has no 16-bit form and C_JALR have no immediate +// operand field. We let linker relaxation deal with it. When linker +// relaxation enabled, AUIPC and JALR have chance relax to JAL. If C extension +// is enabled, JAL has chance relax to C_JAL. void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { MCInst TmpInst; MCOperand Func = MI.getOperand(0); unsigned Ra = (MI.getOpcode() == RISCV::PseudoTAIL) ? RISCV::X6 : RISCV::X1; uint32_t Binary; assert(Func.isExpr() && "Expected expression"); const MCExpr *Expr = Func.getExpr(); // Create function call expression CallExpr for AUIPC. const MCExpr *CallExpr = RISCVMCExpr::create(Expr, RISCVMCExpr::VK_RISCV_CALL, Ctx); // Emit AUIPC Ra, Func with R_RISCV_CALL relocation type. TmpInst = MCInstBuilder(RISCV::AUIPC) .addReg(Ra) .addOperand(MCOperand::createExpr(CallExpr)); Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); support::endian::write(OS, Binary, support::little); // Emit JALR Ra, Ra, 0 TmpInst = MCInstBuilder(RISCV::JALR).addReg(Ra).addReg(Ra).addImm(0); Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); support::endian::write(OS, Binary, support::little); } void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); // Get byte count of instruction. unsigned Size = Desc.getSize(); if (MI.getOpcode() == RISCV::PseudoCALL || MI.getOpcode() == RISCV::PseudoTAIL) { expandFunctionCall(MI, OS, Fixups, STI); MCNumEmitted += 2; return; } switch (Size) { default: llvm_unreachable("Unhandled encodeInstruction length!"); case 2: { uint16_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); support::endian::write(OS, Bits, support::little); break; } case 4: { uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); support::endian::write(OS, Bits, support::little); break; } } ++MCNumEmitted; // Keep track of the # of mi's emitted. } unsigned RISCVMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { if (MO.isReg()) return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); if (MO.isImm()) return static_cast(MO.getImm()); llvm_unreachable("Unhandled expression!"); return 0; } unsigned RISCVMCCodeEmitter::getImmOpValueAsr1(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); if (MO.isImm()) { unsigned Res = MO.getImm(); assert((Res & 1) == 0 && "LSB is non-zero"); return Res >> 1; } return getImmOpValue(MI, OpNo, Fixups, STI); } unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); MCInstrDesc const &Desc = MCII.get(MI.getOpcode()); unsigned MIFrm = Desc.TSFlags & RISCVII::InstFormatMask; // If the destination is an immediate, there is nothing to do if (MO.isImm()) return MO.getImm(); assert(MO.isExpr() && "getImmOpValue expects only expressions or immediates"); const MCExpr *Expr = MO.getExpr(); MCExpr::ExprKind Kind = Expr->getKind(); RISCV::Fixups FixupKind = RISCV::fixup_riscv_invalid; if (Kind == MCExpr::Target) { const RISCVMCExpr *RVExpr = cast(Expr); switch (RVExpr->getKind()) { case RISCVMCExpr::VK_RISCV_None: case RISCVMCExpr::VK_RISCV_Invalid: llvm_unreachable("Unhandled fixup kind!"); case RISCVMCExpr::VK_RISCV_LO: if (MIFrm == RISCVII::InstFormatI) FixupKind = RISCV::fixup_riscv_lo12_i; else if (MIFrm == RISCVII::InstFormatS) FixupKind = RISCV::fixup_riscv_lo12_s; else llvm_unreachable("VK_RISCV_LO used with unexpected instruction format"); break; case RISCVMCExpr::VK_RISCV_HI: FixupKind = RISCV::fixup_riscv_hi20; break; case RISCVMCExpr::VK_RISCV_PCREL_LO: if (MIFrm == RISCVII::InstFormatI) FixupKind = RISCV::fixup_riscv_pcrel_lo12_i; else if (MIFrm == RISCVII::InstFormatS) FixupKind = RISCV::fixup_riscv_pcrel_lo12_s; else llvm_unreachable( "VK_RISCV_PCREL_LO used with unexpected instruction format"); break; case RISCVMCExpr::VK_RISCV_PCREL_HI: FixupKind = RISCV::fixup_riscv_pcrel_hi20; break; case RISCVMCExpr::VK_RISCV_CALL: FixupKind = RISCV::fixup_riscv_call; break; } } else if (Kind == MCExpr::SymbolRef && cast(Expr)->getKind() == MCSymbolRefExpr::VK_None) { if (Desc.getOpcode() == RISCV::JAL) { FixupKind = RISCV::fixup_riscv_jal; } else if (MIFrm == RISCVII::InstFormatB) { FixupKind = RISCV::fixup_riscv_branch; } else if (MIFrm == RISCVII::InstFormatCJ) { FixupKind = RISCV::fixup_riscv_rvc_jump; } else if (MIFrm == RISCVII::InstFormatCB) { FixupKind = RISCV::fixup_riscv_rvc_branch; } } assert(FixupKind != RISCV::fixup_riscv_invalid && "Unhandled expression!"); Fixups.push_back( MCFixup::create(0, Expr, MCFixupKind(FixupKind), MI.getLoc())); ++MCNumFixups; return 0; } #include "RISCVGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d3d0d0370b9f..f57261434648 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1,1448 +1,1557 @@ //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that RISCV uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #include "RISCVISelLowering.h" #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" #include "RISCVRegisterInfo.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "riscv-lower" +STATISTIC(NumTailCalls, "Number of tail calls"); + RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { MVT XLenVT = Subtarget.getXLenVT(); // Set up the register classes. addRegisterClass(XLenVT, &RISCV::GPRRegClass); if (Subtarget.hasStdExtF()) addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); if (Subtarget.hasStdExtD()) addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); // Compute derived properties from the register classes. computeRegisterProperties(STI.getRegisterInfo()); setStackPointerRegisterToSaveRestore(RISCV::X2); for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) setLoadExtAction(N, XLenVT, MVT::i1, Promote); // TODO: add all necessary setOperationAction calls. setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BR_CC, XLenVT, Expand); setOperationAction(ISD::SELECT, XLenVT, Custom); setOperationAction(ISD::SELECT_CC, XLenVT, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); setOperationAction(ISD::ADDC, XLenVT, Expand); setOperationAction(ISD::ADDE, XLenVT, Expand); setOperationAction(ISD::SUBC, XLenVT, Expand); setOperationAction(ISD::SUBE, XLenVT, Expand); if (!Subtarget.hasStdExtM()) { setOperationAction(ISD::MUL, XLenVT, Expand); setOperationAction(ISD::MULHS, XLenVT, Expand); setOperationAction(ISD::MULHU, XLenVT, Expand); setOperationAction(ISD::SDIV, XLenVT, Expand); setOperationAction(ISD::UDIV, XLenVT, Expand); setOperationAction(ISD::SREM, XLenVT, Expand); setOperationAction(ISD::UREM, XLenVT, Expand); } setOperationAction(ISD::SDIVREM, XLenVT, Expand); setOperationAction(ISD::UDIVREM, XLenVT, Expand); setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); setOperationAction(ISD::SHL_PARTS, XLenVT, Expand); setOperationAction(ISD::SRL_PARTS, XLenVT, Expand); setOperationAction(ISD::SRA_PARTS, XLenVT, Expand); setOperationAction(ISD::ROTL, XLenVT, Expand); setOperationAction(ISD::ROTR, XLenVT, Expand); setOperationAction(ISD::BSWAP, XLenVT, Expand); setOperationAction(ISD::CTTZ, XLenVT, Expand); setOperationAction(ISD::CTLZ, XLenVT, Expand); setOperationAction(ISD::CTPOP, XLenVT, Expand); ISD::CondCode FPCCToExtend[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ, ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, ISD::SETGE, ISD::SETNE}; if (Subtarget.hasStdExtF()) { setOperationAction(ISD::FMINNUM, MVT::f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); for (auto CC : FPCCToExtend) setCondCodeAction(CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Expand); } if (Subtarget.hasStdExtD()) { setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); for (auto CC : FPCCToExtend) setCondCodeAction(CC, MVT::f64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); } setOperationAction(ISD::GlobalAddress, XLenVT, Custom); setOperationAction(ISD::BlockAddress, XLenVT, Custom); setOperationAction(ISD::ConstantPool, XLenVT, Custom); setBooleanContents(ZeroOrOneBooleanContent); // Function alignments (log2). unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2; setMinFunctionAlignment(FunctionAlignment); setPrefFunctionAlignment(FunctionAlignment); // Effectively disable jump table generation. setMinimumJumpTableEntries(INT_MAX); } EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return getPointerTy(DL); return VT.changeVectorElementTypeToInteger(); } bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; // Require a 12-bit signed offset. if (!isInt<12>(AM.BaseOffs)) return false; switch (AM.Scale) { case 0: // "r+i" or just "i", depending on HasBaseReg. break; case 1: if (!AM.HasBaseReg) // allow "r+i". break; return false; // disallow "r+r" or "r+r+i". default: return false; } return true; } bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { return isInt<12>(Imm); } bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { return isInt<12>(Imm); } // On RV32, 64-bit integers are split into their high and low parts and held // in two different registers, so the trunc is free since the low register can // just be used. bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) return false; unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); unsigned DestBits = DstTy->getPrimitiveSizeInBits(); return (SrcBits == 64 && DestBits == 32); } bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || !DstVT.isInteger()) return false; unsigned SrcBits = SrcVT.getSizeInBits(); unsigned DestBits = DstVT.getSizeInBits(); return (SrcBits == 64 && DestBits == 32); } bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { // Zexts are free if they can be combined with a load. if (auto *LD = dyn_cast(Val)) { EVT MemVT = LD->getMemoryVT(); if ((MemVT == MVT::i8 || MemVT == MVT::i16 || (Subtarget.is64Bit() && MemVT == MVT::i32)) && (LD->getExtensionType() == ISD::NON_EXTLOAD || LD->getExtensionType() == ISD::ZEXTLOAD)) return true; } return TargetLowering::isZExtFree(Val, VT2); } // Changes the condition code and swaps operands if necessary, so the SetCC // operation matches one of the comparisons supported directly in the RISC-V // ISA. static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { switch (CC) { default: break; case ISD::SETGT: case ISD::SETLE: case ISD::SETUGT: case ISD::SETULE: CC = ISD::getSetCCSwappedOperands(CC); std::swap(LHS, RHS); break; } } // Return the RISC-V branch opcode that matches the given DAG integer // condition code. The CondCode must be one of those supported by the RISC-V // ISA (see normaliseSetCC). static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unsupported CondCode"); case ISD::SETEQ: return RISCV::BEQ; case ISD::SETNE: return RISCV::BNE; case ISD::SETLT: return RISCV::BLT; case ISD::SETGE: return RISCV::BGE; case ISD::SETULT: return RISCV::BLTU; case ISD::SETUGE: return RISCV::BGEU; } } SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: report_fatal_error("unimplemented operand"); case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); case ISD::ConstantPool: return lowerConstantPool(Op, DAG); case ISD::SELECT: return lowerSELECT(Op, DAG); case ISD::VASTART: return lowerVASTART(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); } } SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast(Op); const GlobalValue *GV = N->getGlobal(); int64_t Offset = N->getOffset(); MVT XLenVT = Subtarget.getXLenVT(); if (isPositionIndependent() || Subtarget.is64Bit()) report_fatal_error("Unable to lowerGlobalAddress"); // In order to maximise the opportunity for common subexpression elimination, // emit a separate ADD node for the global address offset instead of folding // it in the global address node. Later peephole optimisations may choose to // fold it back in when profitable. SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); SDValue MNLo = SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); if (Offset != 0) return DAG.getNode(ISD::ADD, DL, Ty, MNLo, DAG.getConstant(Offset, DL, XLenVT)); return MNLo; } SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT Ty = Op.getValueType(); BlockAddressSDNode *N = cast(Op); const BlockAddress *BA = N->getBlockAddress(); int64_t Offset = N->getOffset(); if (isPositionIndependent() || Subtarget.is64Bit()) report_fatal_error("Unable to lowerBlockAddress"); SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI); SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO); SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0); SDValue MNLo = SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0); return MNLo; } SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT Ty = Op.getValueType(); ConstantPoolSDNode *N = cast(Op); const Constant *CPA = N->getConstVal(); int64_t Offset = N->getOffset(); unsigned Alignment = N->getAlignment(); if (!isPositionIndependent()) { SDValue CPAHi = DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI); SDValue CPALo = DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO); SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0); SDValue MNLo = SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0); return MNLo; } else { report_fatal_error("Unable to lowerConstantPool"); } } SDValue RISCVTargetLowering::lowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT Ty = Op.getValueType(); ExternalSymbolSDNode *N = cast(Op); const char *Sym = N->getSymbol(); // TODO: should also handle gp-relative loads. if (isPositionIndependent() || Subtarget.is64Bit()) report_fatal_error("Unable to lowerExternalSymbol"); SDValue GAHi = DAG.getTargetExternalSymbol(Sym, Ty, RISCVII::MO_HI); SDValue GALo = DAG.getTargetExternalSymbol(Sym, Ty, RISCVII::MO_LO); SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); SDValue MNLo = SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); return MNLo; } SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue CondV = Op.getOperand(0); SDValue TrueV = Op.getOperand(1); SDValue FalseV = Op.getOperand(2); SDLoc DL(Op); MVT XLenVT = Subtarget.getXLenVT(); // If the result type is XLenVT and CondV is the output of a SETCC node // which also operated on XLenVT inputs, then merge the SETCC node into the // lowered RISCVISD::SELECT_CC to take advantage of the integer // compare+branch instructions. i.e.: // (select (setcc lhs, rhs, cc), truev, falsev) // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && CondV.getOperand(0).getSimpleValueType() == XLenVT) { SDValue LHS = CondV.getOperand(0); SDValue RHS = CondV.getOperand(1); auto CC = cast(CondV.getOperand(2)); ISD::CondCode CCVal = CC->get(); normaliseSetCC(LHS, RHS, CCVal); SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); } // Otherwise: // (select condv, truev, falsev) // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) SDValue Zero = DAG.getConstant(0, DL, XLenVT); SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); } SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); RISCVMachineFunctionInfo *FuncInfo = MF.getInfo(); SDLoc DL(Op); SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy(MF.getDataLayout())); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), MachinePointerInfo(SV)); } SDValue RISCVTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setFrameAddressIsTaken(true); unsigned FrameReg = RI.getFrameRegister(MF); int XLenInBytes = Subtarget.getXLen() / 8; EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); while (Depth--) { int Offset = -(XLenInBytes * 2); SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, DAG.getIntPtrConstant(Offset, DL)); FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); } return FrameAddr; } SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setReturnAddressIsTaken(true); MVT XLenVT = Subtarget.getXLenVT(); int XLenInBytes = Subtarget.getXLen() / 8; if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); EVT VT = Op.getValueType(); SDLoc DL(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); if (Depth) { int Off = -XLenInBytes; SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(Off, DL, VT); return DAG.getLoad(VT, DL, DAG.getEntryNode(), DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo()); } // Return the value of the return address register, marking it an implicit // live-in. unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); } static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB) { assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); MachineFunction &MF = *BB->getParent(); DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); unsigned LoReg = MI.getOperand(0).getReg(); unsigned HiReg = MI.getOperand(1).getReg(); unsigned SrcReg = MI.getOperand(2).getReg(); const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; int FI = MF.getInfo()->getMoveF64FrameIndex(); TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, RI); MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, 8, 8); BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) .addFrameIndex(FI) .addImm(0) .addMemOperand(MMO); BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) .addFrameIndex(FI) .addImm(4) .addMemOperand(MMO); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB) { assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && "Unexpected instruction"); MachineFunction &MF = *BB->getParent(); DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); unsigned DstReg = MI.getOperand(0).getReg(); unsigned LoReg = MI.getOperand(1).getReg(); unsigned HiReg = MI.getOperand(2).getReg(); const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; int FI = MF.getInfo()->getMoveF64FrameIndex(); MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, 8, 8); BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) .addFrameIndex(FI) .addImm(0) .addMemOperand(MMO); BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) .addFrameIndex(FI) .addImm(4) .addMemOperand(MMO); TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); case RISCV::Select_GPR_Using_CC_GPR: case RISCV::Select_FPR32_Using_CC_GPR: case RISCV::Select_FPR64_Using_CC_GPR: break; case RISCV::BuildPairF64Pseudo: return emitBuildPairF64Pseudo(MI, BB); case RISCV::SplitF64Pseudo: return emitSplitF64Pseudo(MI, BB); } // To "insert" a SELECT instruction, we actually have to insert the triangle // control-flow pattern. The incoming instruction knows the destination vreg // to set, the condition code register to branch on, the true/false values to // select between, and the condcode to use to select the appropriate branch. // // We produce the following control flow: // HeadMBB // | \ // | IfFalseMBB // | / // TailMBB const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); DebugLoc DL = MI.getDebugLoc(); MachineFunction::iterator I = ++BB->getIterator(); MachineBasicBlock *HeadMBB = BB; MachineFunction *F = BB->getParent(); MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(I, IfFalseMBB); F->insert(I, TailMBB); // Move all remaining instructions to TailMBB. TailMBB->splice(TailMBB->begin(), HeadMBB, std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end()); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); // Set the successors for HeadMBB. HeadMBB->addSuccessor(IfFalseMBB); HeadMBB->addSuccessor(TailMBB); // Insert appropriate branch. unsigned LHS = MI.getOperand(1).getReg(); unsigned RHS = MI.getOperand(2).getReg(); auto CC = static_cast(MI.getOperand(3).getImm()); unsigned Opcode = getBranchOpcodeForIntCondCode(CC); BuildMI(HeadMBB, DL, TII.get(Opcode)) .addReg(LHS) .addReg(RHS) .addMBB(TailMBB); // IfFalseMBB just falls through to TailMBB. IfFalseMBB->addSuccessor(TailMBB); // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(4).getReg()) .addMBB(HeadMBB) .addReg(MI.getOperand(5).getReg()) .addMBB(IfFalseMBB); MI.eraseFromParent(); // The pseudo instruction is gone now. return TailMBB; } // Calling Convention Implementation. // The expectations for frontend ABI lowering vary from target to target. // Ideally, an LLVM frontend would be able to avoid worrying about many ABI // details, but this is a longer term goal. For now, we simply try to keep the // role of the frontend as simple and well-defined as possible. The rules can // be summarised as: // * Never split up large scalar arguments. We handle them here. // * If a hardfloat calling convention is being used, and the struct may be // passed in a pair of registers (fp+fp, int+fp), and both registers are // available, then pass as two separate arguments. If either the GPRs or FPRs // are exhausted, then pass according to the rule below. // * If a struct could never be passed in registers or directly in a stack // slot (as it is larger than 2*XLEN and the floating point rules don't // apply), then pass it using a pointer with the byval attribute. // * If a struct is less than 2*XLEN, then coerce to either a two-element // word-sized array or a 2*XLEN scalar (depending on alignment). // * The frontend can determine whether a struct is returned by reference or // not based on its size and fields. If it will be returned by reference, the // frontend must modify the prototype so a pointer with the sret annotation is // passed as the first argument. This is not necessary for large scalar // returns. // * Struct return values and varargs should be coerced to structs containing // register-size fields in the same situations they would be for fixed // arguments. static const MCPhysReg ArgGPRs[] = { RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 }; // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2) { unsigned XLenInBytes = XLen / 8; if (unsigned Reg = State.AllocateReg(ArgGPRs)) { // At least one half can be passed via register. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, VA1.getLocVT(), CCValAssign::Full)); } else { // Both halves must be passed on the stack, with proper alignment. unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign()); State.addLoc( CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), State.AllocateStack(XLenInBytes, StackAlign), VA1.getLocVT(), CCValAssign::Full)); State.addLoc(CCValAssign::getMem( ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, CCValAssign::Full)); return false; } if (unsigned Reg = State.AllocateReg(ArgGPRs)) { // The second half can also be passed via register. State.addLoc( CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); } else { // The second half is passed via the stack, without additional alignment. State.addLoc(CCValAssign::getMem( ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, CCValAssign::Full)); } return false; } // Implements the RISC-V calling convention. Returns true upon failure. static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); assert(XLen == 32 || XLen == 64); MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; if (ValVT == MVT::f32) { LocVT = MVT::i32; LocInfo = CCValAssign::BCvt; } // Any return value split in to more than two values can't be returned // directly. if (IsRet && ValNo > 1) return true; // If this is a variadic argument, the RISC-V calling convention requires // that it is assigned an 'even' or 'aligned' register if it has 8-byte // alignment (RV32) or 16-byte alignment (RV64). An aligned register should // be used regardless of whether the original argument was split during // legalisation or not. The argument will not be passed by registers if the // original type is larger than 2*XLEN, so the register alignment rule does // not apply. unsigned TwoXLenInBytes = (2 * XLen) / 8; if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); // Skip 'odd' register if necessary. if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) State.AllocateReg(ArgGPRs); } SmallVectorImpl &PendingLocs = State.getPendingLocs(); SmallVectorImpl &PendingArgFlags = State.getPendingArgFlags(); assert(PendingLocs.size() == PendingArgFlags.size() && "PendingLocs and PendingArgFlags out of sync"); // Handle passing f64 on RV32D with a soft float ABI. if (XLen == 32 && ValVT == MVT::f64) { assert(!ArgFlags.isSplit() && PendingLocs.empty() && "Can't lower f64 if it is split"); // Depending on available argument GPRS, f64 may be passed in a pair of // GPRs, split between a GPR and the stack, or passed completely on the // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these // cases. unsigned Reg = State.AllocateReg(ArgGPRs); LocVT = MVT::i32; if (!Reg) { unsigned StackOffset = State.AllocateStack(8, 8); State.addLoc( CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); return false; } if (!State.AllocateReg(ArgGPRs)) State.AllocateStack(4, 4); State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } // Split arguments might be passed indirectly, so keep track of the pending // values. if (ArgFlags.isSplit() || !PendingLocs.empty()) { LocVT = XLenVT; LocInfo = CCValAssign::Indirect; PendingLocs.push_back( CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); PendingArgFlags.push_back(ArgFlags); if (!ArgFlags.isSplitEnd()) { return false; } } // If the split argument only had two elements, it should be passed directly // in registers or on the stack. if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); // Apply the normal calling convention rules to the first half of the // split argument. CCValAssign VA = PendingLocs[0]; ISD::ArgFlagsTy AF = PendingArgFlags[0]; PendingLocs.clear(); PendingArgFlags.clear(); return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags); } // Allocate to a register if possible, or else a stack slot. unsigned Reg = State.AllocateReg(ArgGPRs); unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); // If we reach this point and PendingLocs is non-empty, we must be at the // end of a split argument that must be passed indirectly. if (!PendingLocs.empty()) { assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); for (auto &It : PendingLocs) { if (Reg) It.convertToReg(Reg); else It.convertToMem(StackOffset); State.addLoc(It); } PendingLocs.clear(); PendingArgFlags.clear(); return false; } assert(LocVT == XLenVT && "Expected an XLenVT at this stage"); if (Reg) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); } else { State.addLoc( CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); } return false; } void RISCVTargetLowering::analyzeInputArgs( MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Ins, bool IsRet) const { unsigned NumArgs = Ins.size(); FunctionType *FType = MF.getFunction().getFunctionType(); for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Ins[i].VT; ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; Type *ArgTy = nullptr; if (IsRet) ArgTy = FType->getReturnType(); else if (Ins[i].isOrigArg()) ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) { LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << '\n'); llvm_unreachable(nullptr); } } } void RISCVTargetLowering::analyzeOutputArgs( MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Outs, bool IsRet, CallLoweringInfo *CLI) const { unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; i++) { MVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << "\n"); llvm_unreachable(nullptr); } } } // The caller is responsible for loading the full value if the argument is // passed with CCValAssign::Indirect. static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL) { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); EVT LocVT = VA.getLocVT(); EVT ValVT = VA.getValVT(); SDValue Val; unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); switch (VA.getLocInfo()) { default: llvm_unreachable("Unexpected CCValAssign::LocInfo"); case CCValAssign::Full: case CCValAssign::Indirect: break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); break; } return Val; } // The caller is responsible for loading the full value if the argument is // passed with CCValAssign::Indirect. static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL) { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); EVT LocVT = VA.getLocVT(); EVT ValVT = VA.getValVT(); EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, VA.getLocMemOffset(), /*Immutable=*/true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val; ISD::LoadExtType ExtType; switch (VA.getLocInfo()) { default: llvm_unreachable("Unexpected CCValAssign::LocInfo"); case CCValAssign::Full: case CCValAssign::Indirect: ExtType = ISD::NON_EXTLOAD; break; } Val = DAG.getExtLoad( ExtType, DL, LocVT, Chain, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); return Val; } static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL) { assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && "Unexpected VA"); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); if (VA.isMemLoc()) { // f64 is passed on the stack. int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); return DAG.getLoad(MVT::f64, DL, Chain, FIN, MachinePointerInfo::getFixedStack(MF, FI)); } assert(VA.isRegLoc() && "Expected register VA assignment"); unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); RegInfo.addLiveIn(VA.getLocReg(), LoVReg); SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); SDValue Hi; if (VA.getLocReg() == RISCV::X17) { // Second half of f64 is passed on the stack. int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, MachinePointerInfo::getFixedStack(MF, FI)); } else { // Second half of f64 is passed in another GPR. unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); } return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); } // Transform physical registers into virtual registers. SDValue RISCVTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { switch (CallConv) { default: report_fatal_error("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: break; } MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); unsigned XLenInBytes = Subtarget.getXLen() / 8; // Used with vargs to acumulate store chains. std::vector OutChains; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; assert(VA.getLocVT() == XLenVT && "Unhandled argument type"); SDValue ArgValue; // Passing f64 on RV32D with a soft float ABI must be handled as a special // case. if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); else if (VA.isRegLoc()) ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); else ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); if (VA.getLocInfo() == CCValAssign::Indirect) { // If the original argument was split and passed by reference (e.g. i128 // on RV32), we need to load all parts of it here (using the same // address). InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo())); unsigned ArgIndex = Ins[i].OrigArgIndex; assert(Ins[i].PartOffset == 0); while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { CCValAssign &PartVA = ArgLocs[i + 1]; unsigned PartOffset = Ins[i + 1].PartOffset; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, DAG.getIntPtrConstant(PartOffset, DL)); InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, MachinePointerInfo())); ++i; } continue; } InVals.push_back(ArgValue); } if (IsVarArg) { ArrayRef ArgRegs = makeArrayRef(ArgGPRs); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); const TargetRegisterClass *RC = &RISCV::GPRRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); RISCVMachineFunctionInfo *RVFI = MF.getInfo(); // Offset of the first variable argument from stack pointer, and size of // the vararg save area. For now, the varargs save area is either zero or // large enough to hold a0-a7. int VaArgOffset, VarArgsSaveSize; // If all registers are allocated, then all varargs must be passed on the // stack and we don't need to save any argregs. if (ArgRegs.size() == Idx) { VaArgOffset = CCInfo.getNextStackOffset(); VarArgsSaveSize = 0; } else { VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); VaArgOffset = -VarArgsSaveSize; } // Record the frame index of the first variable argument // which is a value necessary to VASTART. int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); RVFI->setVarArgsFrameIndex(FI); // If saving an odd number of registers then create an extra stack slot to // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures // offsets to even-numbered registered remain 2*XLEN-aligned. if (Idx % 2) { FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); VarArgsSaveSize += XLenInBytes; } // Copy the integer registers that may have been used for passing varargs // to the vararg save area. for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += XLenInBytes) { const unsigned Reg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(ArgRegs[I], Reg); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo::getFixedStack(MF, FI)); cast(Store.getNode()) ->getMemOperand() ->setValue((Value *)nullptr); OutChains.push_back(Store); } RVFI->setVarArgsSaveSize(VarArgsSaveSize); } // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens for vararg functions. if (!OutChains.empty()) { OutChains.push_back(Chain); Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); } return Chain; } +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. +/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. +bool RISCVTargetLowering::IsEligibleForTailCallOptimization( + CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, + const SmallVector &ArgLocs) const { + + auto &Callee = CLI.Callee; + auto CalleeCC = CLI.CallConv; + auto IsVarArg = CLI.IsVarArg; + auto &Outs = CLI.Outs; + auto &Caller = MF.getFunction(); + auto CallerCC = Caller.getCallingConv(); + + // Do not tail call opt functions with "disable-tail-calls" attribute. + if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") + return false; + + // Exception-handling functions need a special set of instructions to + // indicate a return to the hardware. Tail-calling another function would + // probably break this. + // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This + // should be expanded as new function attributes are introduced. + if (Caller.hasFnAttribute("interrupt")) + return false; + + // Do not tail call opt functions with varargs. + if (IsVarArg) + return false; + + // Do not tail call opt if the stack is used to pass parameters. + if (CCInfo.getNextStackOffset() != 0) + return false; + + // Do not tail call opt if any parameters need to be passed indirectly. + // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are + // passed indirectly. So the address of the value will be passed in a + // register, or if not available, then the address is put on the stack. In + // order to pass indirectly, space on the stack often needs to be allocated + // in order to store the value. In this case the CCInfo.getNextStackOffset() + // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs + // are passed CCValAssign::Indirect. + for (auto &VA : ArgLocs) + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + + // Do not tail call opt if either caller or callee uses struct return + // semantics. + auto IsCallerStructRet = Caller.hasStructRetAttr(); + auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); + if (IsCallerStructRet || IsCalleeStructRet) + return false; + + // Externally-defined functions with weak linkage should not be + // tail-called. The behaviour of branch instructions in this situation (as + // used for tail calls) is implementation-defined, so we cannot rely on the + // linker replacing the tail call with a return. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + const GlobalValue *GV = G->getGlobal(); + if (GV->hasExternalWeakLinkage()) + return false; + } + + // The callee has to preserve all registers the caller needs to preserve. + const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); + if (CalleeCC != CallerCC) { + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) + return false; + } + + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. Working around this *is* possible + // but less efficient and uglier in LowerCall. + for (auto &Arg : Outs) + if (Arg.Flags.isByVal()) + return false; + + return true; +} + // Lower a call to a callseq_start + CALL + callseq_end chain, and add input // and output parameter nodes. SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &DL = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; - CLI.IsTailCall = false; + bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); MachineFunction &MF = DAG.getMachineFunction(); // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); + // Check if it's really possible to do a tail call. + if (IsTailCall) + IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, + ArgLocs); + + if (IsTailCall) + ++NumTailCalls; + else if (CLI.CS && CLI.CS.isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); + // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getNextStackOffset(); // Create local copies for byval args SmallVector ByValArgs; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; if (!Flags.isByVal()) continue; SDValue Arg = OutVals[i]; unsigned Size = Flags.getByValSize(); unsigned Align = Flags.getByValAlign(); int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false); SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, /*IsVolatile=*/false, /*AlwaysInline=*/false, - /*isTailCall=*/false, MachinePointerInfo(), + IsTailCall, MachinePointerInfo(), MachinePointerInfo()); ByValArgs.push_back(FIPtr); } - Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); + if (!IsTailCall) + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); // Copy argument values to their designated locations. SmallVector, 8> RegsToPass; SmallVector MemOpChains; SDValue StackPtr; for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue ArgValue = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // Handle passing f64 on RV32D with a soft float ABI as a special case. bool IsF64OnRV32DSoftABI = VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { SDValue SplitF64 = DAG.getNode( RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); SDValue Lo = SplitF64.getValue(0); SDValue Hi = SplitF64.getValue(1); unsigned RegLo = VA.getLocReg(); RegsToPass.push_back(std::make_pair(RegLo, Lo)); if (RegLo == RISCV::X17) { // Second half of f64 is passed on the stack. // Work out the address of the stack slot. if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); // Emit the store. MemOpChains.push_back( DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); } else { // Second half of f64 is passed in another GPR. unsigned RegHigh = RegLo + 1; RegsToPass.push_back(std::make_pair(RegHigh, Hi)); } continue; } // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way // as any other MemLoc. // Promote the value if needed. // For now, only handle fully promoted and indirect arguments. switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), ArgValue); break; case CCValAssign::Indirect: { // Store the argument in a stack slot and pass its address. SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); int FI = cast(SpillSlot)->getIndex(); MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, SpillSlot, MachinePointerInfo::getFixedStack(MF, FI))); // If the original argument was split (e.g. i128), we need // to store all parts of it here (and pass just one address). unsigned ArgIndex = Outs[i].OrigArgIndex; assert(Outs[i].PartOffset == 0); while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { SDValue PartValue = OutVals[i + 1]; unsigned PartOffset = Outs[i + 1].PartOffset; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, DAG.getIntPtrConstant(PartOffset, DL)); MemOpChains.push_back( DAG.getStore(Chain, DL, PartValue, Address, MachinePointerInfo::getFixedStack(MF, FI))); ++i; } ArgValue = SpillSlot; break; } default: llvm_unreachable("Unknown loc info!"); } // Use local copy if it is a byval arg. if (Flags.isByVal()) ArgValue = ByValArgs[j++]; if (VA.isRegLoc()) { // Queue up the argument copies and emit them at the end. RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); } else { assert(VA.isMemLoc() && "Argument not register or memory"); + assert(!IsTailCall && "Tail call not allowed if stack is used " + "for passing parameters"); // Work out the address of the stack slot. if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); // Emit the store. MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); } } // Join the stores, which are independent of one another. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); SDValue Glue; // Build a sequence of copy-to-reg nodes, chained and glued together. for (auto &Reg : RegsToPass) { Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); Glue = Chain.getValue(1); } // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't // split it and then direct call can be matched by PseudoCALL. if (GlobalAddressSDNode *S = dyn_cast(Callee)) { Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0); } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0); } // The first call operand is the chain and the second is the target address. SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are // known live into the call. for (auto &Reg : RegsToPass) Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); - // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); + if (!IsTailCall) { + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + } // Glue the call to the argument copies, if any. if (Glue.getNode()) Ops.push_back(Glue); // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + if (IsTailCall) { + MF.getFrameInfo().setHasTailCall(); + return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); + } + Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); Glue = Chain.getValue(1); // Mark the end of the call, which is glued to the call itself. Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, DL, PtrVT, true), DAG.getConstant(0, DL, PtrVT, true), Glue, DL); Glue = Chain.getValue(1); // Assign locations to each value returned by this call. SmallVector RVLocs; CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); // Copy all of the result registers out of their specified physreg. for (auto &VA : RVLocs) { // Copy the value out SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); // Glue the RetValue to the end of the call sequence Chain = RetValue.getValue(1); Glue = RetValue.getValue(2); if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); Chain = RetValue2.getValue(1); Glue = RetValue2.getValue(2); RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, RetValue2); } switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: RetValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), RetValue); break; } InVals.push_back(RetValue); } return Chain; } bool RISCVTargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); for (unsigned i = 0, e = Outs.size(); i != e; ++i) { MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) return false; } return true; } static SDValue packIntoRegLoc(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL) { EVT LocVT = VA.getLocVT(); switch (VA.getLocInfo()) { default: llvm_unreachable("Unexpected CCValAssign::LocInfo"); case CCValAssign::Full: break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); break; } return Val; } SDValue RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { // Stores the assignment of the return value to a location. SmallVector RVLocs; // Info about the registers and stack slot. CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, nullptr); SDValue Glue; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { SDValue Val = OutVals[i]; CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { // Handle returning f64 on RV32D with a soft float ABI. assert(VA.isRegLoc() && "Expected return via registers"); SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), Val); SDValue Lo = SplitF64.getValue(0); SDValue Hi = SplitF64.getValue(1); unsigned RegLo = VA.getLocReg(); unsigned RegHi = RegLo + 1; Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); Glue = Chain.getValue(1); RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); Glue = Chain.getValue(1); RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); } else { // Handle a 'normal' return. Val = packIntoRegLoc(DAG, Val, VA, DL); Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); // Guarantee that all emitted copies are stuck together. Glue = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } } RetOps[0] = Chain; // Update chain. // Add the glue node if we have it. if (Glue.getNode()) { RetOps.push_back(Glue); } return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); } const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((RISCVISD::NodeType)Opcode) { case RISCVISD::FIRST_NUMBER: break; case RISCVISD::RET_FLAG: return "RISCVISD::RET_FLAG"; case RISCVISD::CALL: return "RISCVISD::CALL"; case RISCVISD::SELECT_CC: return "RISCVISD::SELECT_CC"; case RISCVISD::BuildPairF64: return "RISCVISD::BuildPairF64"; case RISCVISD::SplitF64: return "RISCVISD::SplitF64"; + case RISCVISD::TAIL: + return "RISCVISD::TAIL"; } return nullptr; } std::pair RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { // First, see if this is a constraint that directly corresponds to a // RISCV register class. if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': return std::make_pair(0U, &RISCV::GPRRegClass); default: break; } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 83a3bfdda4de..52bbfa1e75b7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -1,106 +1,111 @@ //===-- RISCVISelLowering.h - RISCV DAG Lowering Interface ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that RISCV uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H #include "RISCV.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" namespace llvm { class RISCVSubtarget; namespace RISCVISD { enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, RET_FLAG, CALL, SELECT_CC, BuildPairF64, - SplitF64 + SplitF64, + TAIL }; } class RISCVTargetLowering : public TargetLowering { const RISCVSubtarget &Subtarget; public: explicit RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI); bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I = nullptr) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; // Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; // This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Ins, bool IsRet) const; void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Outs, bool IsRet, CallLoweringInfo *CLI) const; // Lower incoming arguments, copy physregs into vregs SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override { return true; } SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue lowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + + bool IsEligibleForTailCallOptimization(CCState &CCInfo, + CallLoweringInfo &CLI, MachineFunction &MF, + const SmallVector &ArgLocs) const; }; } #endif diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 85652521d6bd..e62308e354a4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1,448 +1,449 @@ //===-- RISCVInstrInfo.cpp - RISCV Instruction Information ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains the RISCV implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// #include "RISCVInstrInfo.h" #include "RISCV.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_CTOR_DTOR #include "RISCVGenInstrInfo.inc" using namespace llvm; RISCVInstrInfo::RISCVInstrInfo() : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP) {} unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { switch (MI.getOpcode()) { default: return 0; case RISCV::LB: case RISCV::LBU: case RISCV::LH: case RISCV::LHU: case RISCV::LW: case RISCV::FLW: case RISCV::LWU: case RISCV::LD: case RISCV::FLD: break; } if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { FrameIndex = MI.getOperand(1).getIndex(); return MI.getOperand(0).getReg(); } return 0; } unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { switch (MI.getOpcode()) { default: return 0; case RISCV::SB: case RISCV::SH: case RISCV::SW: case RISCV::FSW: case RISCV::SD: case RISCV::FSD: break; } if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) { FrameIndex = MI.getOperand(0).getIndex(); return MI.getOperand(2).getReg(); } return 0; } void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DstReg, unsigned SrcReg, bool KillSrc) const { if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) { BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) .addReg(SrcReg, getKillRegState(KillSrc)) .addImm(0); return; } // FPR->FPR copies unsigned Opc; if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) Opc = RISCV::FSGNJ_S; else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) Opc = RISCV::FSGNJ_D; else llvm_unreachable("Impossible reg-to-reg copy"); BuildMI(MBB, MBBI, DL, get(Opc), DstReg) .addReg(SrcReg, getKillRegState(KillSrc)) .addReg(SrcReg, getKillRegState(KillSrc)); } void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool IsKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); unsigned Opcode; if (RISCV::GPRRegClass.hasSubClassEq(RC)) Opcode = RISCV::SW; else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) Opcode = RISCV::FSW; else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) Opcode = RISCV::FSD; else llvm_unreachable("Can't store this register to stack slot"); BuildMI(MBB, I, DL, get(Opcode)) .addReg(SrcReg, getKillRegState(IsKill)) .addFrameIndex(FI) .addImm(0); } void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DstReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); unsigned Opcode; if (RISCV::GPRRegClass.hasSubClassEq(RC)) Opcode = RISCV::LW; else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) Opcode = RISCV::FLW; else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) Opcode = RISCV::FLD; else llvm_unreachable("Can't load this register from stack slot"); BuildMI(MBB, I, DL, get(Opcode), DstReg).addFrameIndex(FI).addImm(0); } void RISCVInstrInfo::movImm32(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DstReg, uint64_t Val, MachineInstr::MIFlag Flag) const { assert(isInt<32>(Val) && "Can only materialize 32-bit constants"); // TODO: If the value can be materialized using only one instruction, only // insert a single instruction. uint64_t Hi20 = ((Val + 0x800) >> 12) & 0xfffff; uint64_t Lo12 = SignExtend64<12>(Val); BuildMI(MBB, MBBI, DL, get(RISCV::LUI), DstReg) .addImm(Hi20) .setMIFlag(Flag); BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) .addReg(DstReg, RegState::Kill) .addImm(Lo12) .setMIFlag(Flag); } // The contents of values added to Cond are not examined outside of // RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we // push BranchOpcode, Reg1, Reg2. static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target, SmallVectorImpl &Cond) { // Block ends with fall-through condbranch. assert(LastInst.getDesc().isConditionalBranch() && "Unknown conditional branch"); Target = LastInst.getOperand(2).getMBB(); Cond.push_back(MachineOperand::CreateImm(LastInst.getOpcode())); Cond.push_back(LastInst.getOperand(0)); Cond.push_back(LastInst.getOperand(1)); } static unsigned getOppositeBranchOpcode(int Opc) { switch (Opc) { default: llvm_unreachable("Unrecognized conditional branch"); case RISCV::BEQ: return RISCV::BNE; case RISCV::BNE: return RISCV::BEQ; case RISCV::BLT: return RISCV::BGE; case RISCV::BGE: return RISCV::BLT; case RISCV::BLTU: return RISCV::BGEU; case RISCV::BGEU: return RISCV::BLTU; } } bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { TBB = FBB = nullptr; Cond.clear(); // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); if (I == MBB.end() || !isUnpredicatedTerminator(*I)) return false; // Count the number of terminators and find the first unconditional or // indirect branch. MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end(); int NumTerminators = 0; for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J); J++) { NumTerminators++; if (J->getDesc().isUnconditionalBranch() || J->getDesc().isIndirectBranch()) { FirstUncondOrIndirectBr = J.getReverse(); } } // If AllowModify is true, we can erase any terminators after // FirstUncondOrIndirectBR. if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) { while (std::next(FirstUncondOrIndirectBr) != MBB.end()) { std::next(FirstUncondOrIndirectBr)->eraseFromParent(); NumTerminators--; } I = FirstUncondOrIndirectBr; } // We can't handle blocks that end in an indirect branch. if (I->getDesc().isIndirectBranch()) return true; // We can't handle blocks with more than 2 terminators. if (NumTerminators > 2) return true; // Handle a single unconditional branch. if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) { TBB = I->getOperand(0).getMBB(); return false; } // Handle a single conditional branch. if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) { parseCondBranch(*I, TBB, Cond); return false; } // Handle a conditional branch followed by an unconditional branch. if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() && I->getDesc().isUnconditionalBranch()) { parseCondBranch(*std::prev(I), TBB, Cond); FBB = I->getOperand(0).getMBB(); return false; } // Otherwise, we can't handle this. return true; } unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { if (BytesRemoved) *BytesRemoved = 0; MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); if (I == MBB.end()) return 0; if (!I->getDesc().isUnconditionalBranch() && !I->getDesc().isConditionalBranch()) return 0; // Remove the branch. I->eraseFromParent(); if (BytesRemoved) *BytesRemoved += getInstSizeInBytes(*I); I = MBB.end(); if (I == MBB.begin()) return 1; --I; if (!I->getDesc().isConditionalBranch()) return 1; // Remove the branch. I->eraseFromParent(); if (BytesRemoved) *BytesRemoved += getInstSizeInBytes(*I); return 2; } // Inserts a branch into the end of the specific MachineBasicBlock, returning // the number of instructions inserted. unsigned RISCVInstrInfo::insertBranch( MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { if (BytesAdded) *BytesAdded = 0; // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 3 || Cond.size() == 0) && "RISCV branch conditions have two components!"); // Unconditional branch. if (Cond.empty()) { MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB); if (BytesAdded) *BytesAdded += getInstSizeInBytes(MI); return 1; } // Either a one or two-way conditional branch. unsigned Opc = Cond[0].getImm(); MachineInstr &CondMI = *BuildMI(&MBB, DL, get(Opc)).add(Cond[1]).add(Cond[2]).addMBB(TBB); if (BytesAdded) *BytesAdded += getInstSizeInBytes(CondMI); // One-way conditional branch. if (!FBB) return 1; // Two-way conditional branch. MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB); if (BytesAdded) *BytesAdded += getInstSizeInBytes(MI); return 2; } unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &DestBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const { assert(RS && "RegScavenger required for long branching"); assert(MBB.empty() && "new block should be inserted for expanding unconditional branch"); assert(MBB.pred_size() == 1); MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const auto &TM = static_cast(MF->getTarget()); const auto &STI = MF->getSubtarget(); if (TM.isPositionIndependent() || STI.is64Bit()) report_fatal_error("Unable to insert indirect branch"); if (!isInt<32>(BrOffset)) report_fatal_error( "Branch offsets outside of the signed 32-bit range not supported"); // FIXME: A virtual register must be used initially, as the register // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch // uses the same workaround). unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); auto II = MBB.end(); MachineInstr &LuiMI = *BuildMI(MBB, II, DL, get(RISCV::LUI), ScratchReg) .addMBB(&DestBB, RISCVII::MO_HI); BuildMI(MBB, II, DL, get(RISCV::PseudoBRIND)) .addReg(ScratchReg, RegState::Kill) .addMBB(&DestBB, RISCVII::MO_LO); RS->enterBasicBlockEnd(MBB); unsigned Scav = RS->scavengeRegisterBackwards( RISCV::GPRRegClass, MachineBasicBlock::iterator(LuiMI), false, 0); MRI.replaceRegWith(ScratchReg, Scav); MRI.clearVirtRegs(); RS->setRegUsed(Scav); return 8; } bool RISCVInstrInfo::reverseBranchCondition( SmallVectorImpl &Cond) const { assert((Cond.size() == 3) && "Invalid branch condition!"); Cond[0].setImm(getOppositeBranchOpcode(Cond[0].getImm())); return false; } MachineBasicBlock * RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { assert(MI.getDesc().isBranch() && "Unexpected opcode!"); // The branch target is always the last operand. int NumOp = MI.getNumExplicitOperands(); return MI.getOperand(NumOp - 1).getMBB(); } bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, int64_t BrOffset) const { // Ideally we could determine the supported branch offset from the // RISCVII::FormMask, but this can't be used for Pseudo instructions like // PseudoBR. switch (BranchOp) { default: llvm_unreachable("Unexpected opcode!"); case RISCV::BEQ: case RISCV::BNE: case RISCV::BLT: case RISCV::BGE: case RISCV::BLTU: case RISCV::BGEU: return isIntN(13, BrOffset); case RISCV::JAL: case RISCV::PseudoBR: return isIntN(21, BrOffset); } } unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); switch (Opcode) { default: { return get(Opcode).getSize(); } case TargetOpcode::EH_LABEL: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: case TargetOpcode::DBG_VALUE: return 0; case RISCV::PseudoCALL: + case RISCV::PseudoTAIL: return 8; case TargetOpcode::INLINEASM: { const MachineFunction &MF = *MI.getParent()->getParent(); const auto &TM = static_cast(MF.getTarget()); return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *TM.getMCAsmInfo()); } } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 6781a62f4f41..4472fee4d3bd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1,729 +1,741 @@ //===-- RISCVInstrInfo.td - Target Description for RISCV ---*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the RISC-V instructions in TableGen format. // //===----------------------------------------------------------------------===// include "RISCVInstrFormats.td" //===----------------------------------------------------------------------===// // RISC-V specific DAG Nodes. //===----------------------------------------------------------------------===// def SDT_RISCVCall : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>; def SDT_RISCVCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def SDT_RISCVCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def SDT_RISCVSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>, SDTCisSameAs<0, 4>, SDTCisSameAs<4, 5>]>; def Call : SDNode<"RISCVISD::CALL", SDT_RISCVCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def CallSeqStart : SDNode<"ISD::CALLSEQ_START", SDT_RISCVCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; def CallSeqEnd : SDNode<"ISD::CALLSEQ_END", SDT_RISCVCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def RetFlag : SDNode<"RISCVISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def SelectCC : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC, [SDNPInGlue]>; +def Tail : SDNode<"RISCVISD::TAIL", SDT_RISCVCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// class ImmAsmOperand : AsmOperandClass { let Name = prefix # "Imm" # width # suffix; let RenderMethod = "addImmOperands"; let DiagnosticType = !strconcat("Invalid", Name); } class SImmAsmOperand : ImmAsmOperand<"S", width, suffix> { } class UImmAsmOperand : ImmAsmOperand<"U", width, suffix> { } def FenceArg : AsmOperandClass { let Name = "FenceArg"; let RenderMethod = "addFenceArgOperands"; let DiagnosticType = "InvalidFenceArg"; } def fencearg : Operand { let ParserMatchClass = FenceArg; let PrintMethod = "printFenceArg"; let DecoderMethod = "decodeUImmOperand<4>"; } def UImmLog2XLenAsmOperand : AsmOperandClass { let Name = "UImmLog2XLen"; let RenderMethod = "addImmOperands"; let DiagnosticType = "InvalidUImmLog2XLen"; } def uimmlog2xlen : Operand, ImmLeafis64Bit()) return isUInt<6>(Imm); return isUInt<5>(Imm); }]> { let ParserMatchClass = UImmLog2XLenAsmOperand; // TODO: should ensure invalid shamt is rejected when decoding. let DecoderMethod = "decodeUImmOperand<6>"; let MCOperandPredicate = [{ int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) return false; if (STI.getTargetTriple().isArch64Bit()) return isUInt<6>(Imm); return isUInt<5>(Imm); }]; } def uimm5 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5>; let DecoderMethod = "decodeUImmOperand<5>"; } def simm12 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<12>; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeSImmOperand<12>"; let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) return isInt<12>(Imm); return MCOp.isBareSymbolRef(); }]; } def uimm12 : Operand { let ParserMatchClass = UImmAsmOperand<12>; let DecoderMethod = "decodeUImmOperand<12>"; } // A 13-bit signed immediate where the least significant bit is zero. def simm13_lsb0 : Operand { let ParserMatchClass = SImmAsmOperand<13, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<13>"; let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) return isShiftedInt<12, 1>(Imm); return MCOp.isBareSymbolRef(); }]; } def uimm20 : Operand { let ParserMatchClass = UImmAsmOperand<20>; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeUImmOperand<20>"; let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) return isUInt<20>(Imm); return MCOp.isBareSymbolRef(); }]; } // A 21-bit signed immediate where the least significant bit is zero. def simm21_lsb0 : Operand { let ParserMatchClass = SImmAsmOperand<21, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<21>"; let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) return isShiftedInt<20, 1>(Imm); return MCOp.isBareSymbolRef(); }]; } def BareSymbol : AsmOperandClass { let Name = "BareSymbol"; let RenderMethod = "addImmOperands"; let DiagnosticType = "InvalidBareSymbol"; } // A bare symbol. def bare_symbol : Operand { let ParserMatchClass = BareSymbol; let MCOperandPredicate = [{ return MCOp.isBareSymbolRef(); }]; } // A parameterized register class alternative to i32imm/i64imm from Target.td. def ixlenimm : Operand; // Standalone (codegen-only) immleaf patterns. def simm32 : ImmLeaf(Imm);}]>; def simm32hi20 : ImmLeaf(Imm);}]>; // Addressing modes. // Necessary because a frameindex can't be matched directly in a pattern. def AddrFI : ComplexPattern; // Extract least significant 12 bits from an immediate value and sign extend // them. def LO12Sext : SDNodeXFormgetTargetConstant(SignExtend64<12>(N->getZExtValue()), SDLoc(N), N->getValueType(0)); }]>; // Extract the most significant 20 bits from an immediate value. Add 1 if bit // 11 is 1, to compensate for the low 12 bits in the matching immediate addi // or ld/st being negative. def HI20 : SDNodeXFormgetTargetConstant(((N->getZExtValue()+0x800) >> 12) & 0xfffff, SDLoc(N), N->getValueType(0)); }]>; //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class BranchCC_rri funct3, string opcodestr> : RVInstB { let isBranch = 1; let isTerminator = 1; } let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in class Load_ri funct3, string opcodestr> : RVInstI; // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction // encoding. let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in class Store_rri funct3, string opcodestr> : RVInstS; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ALU_ri funct3, string opcodestr> : RVInstI; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class Shift_ri funct3, string opcodestr> : RVInstIShift; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ALU_rr funct7, bits<3> funct3, string opcodestr> : RVInstR; let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in class CSR_ir funct3, string opcodestr> : RVInstI; let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in class CSR_ii funct3, string opcodestr> : RVInstI; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ShiftW_ri funct3, string opcodestr> : RVInstIShiftW; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ALUW_rr funct7, bits<3> funct3, string opcodestr> : RVInstR; let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in class Priv funct7> : RVInstR; //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// let hasSideEffects = 0, isReMaterializable = 1, mayLoad = 0, mayStore = 0 in { def LUI : RVInstU; def AUIPC : RVInstU; let isCall = 1 in def JAL : RVInstJ; let isCall = 1 in def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12), "jalr", "$rd, $rs1, $imm12">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 def BEQ : BranchCC_rri<0b000, "beq">; def BNE : BranchCC_rri<0b001, "bne">; def BLT : BranchCC_rri<0b100, "blt">; def BGE : BranchCC_rri<0b101, "bge">; def BLTU : BranchCC_rri<0b110, "bltu">; def BGEU : BranchCC_rri<0b111, "bgeu">; def LB : Load_ri<0b000, "lb">; def LH : Load_ri<0b001, "lh">; def LW : Load_ri<0b010, "lw">; def LBU : Load_ri<0b100, "lbu">; def LHU : Load_ri<0b101, "lhu">; def SB : Store_rri<0b000, "sb">; def SH : Store_rri<0b001, "sh">; def SW : Store_rri<0b010, "sw">; // ADDI isn't always rematerializable, but isReMaterializable will be used as // a hint which is verified in isReallyTriviallyReMaterializable. let isReMaterializable = 1 in def ADDI : ALU_ri<0b000, "addi">; def SLTI : ALU_ri<0b010, "slti">; def SLTIU : ALU_ri<0b011, "sltiu">; def XORI : ALU_ri<0b100, "xori">; def ORI : ALU_ri<0b110, "ori">; def ANDI : ALU_ri<0b111, "andi">; def SLLI : Shift_ri<0, 0b001, "slli">; def SRLI : Shift_ri<0, 0b101, "srli">; def SRAI : Shift_ri<1, 0b101, "srai">; def ADD : ALU_rr<0b0000000, 0b000, "add">; def SUB : ALU_rr<0b0100000, 0b000, "sub">; def SLL : ALU_rr<0b0000000, 0b001, "sll">; def SLT : ALU_rr<0b0000000, 0b010, "slt">; def SLTU : ALU_rr<0b0000000, 0b011, "sltu">; def XOR : ALU_rr<0b0000000, 0b100, "xor">; def SRL : ALU_rr<0b0000000, 0b101, "srl">; def SRA : ALU_rr<0b0100000, 0b101, "sra">; def OR : ALU_rr<0b0000000, 0b110, "or">; def AND : ALU_rr<0b0000000, 0b111, "and">; let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs), (ins fencearg:$pred, fencearg:$succ), "fence", "$pred, $succ"> { bits<4> pred; bits<4> succ; let rs1 = 0; let rd = 0; let imm12 = {0b0000,pred,succ}; } def FENCE_I : RVInstI<0b001, OPC_MISC_MEM, (outs), (ins), "fence.i", ""> { let rs1 = 0; let rd = 0; let imm12 = 0; } def ECALL : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ecall", ""> { let rs1 = 0; let rd = 0; let imm12 = 0; } def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> { let rs1 = 0; let rd = 0; let imm12 = 1; } } // hasSideEffects = 1, mayLoad = 0, mayStore = 0 def CSRRW : CSR_ir<0b001, "csrrw">; def CSRRS : CSR_ir<0b010, "csrrs">; def CSRRC : CSR_ir<0b011, "csrrc">; def CSRRWI : CSR_ii<0b101, "csrrwi">; def CSRRSI : CSR_ii<0b110, "csrrsi">; def CSRRCI : CSR_ii<0b111, "csrrci">; /// RV64I instructions let Predicates = [IsRV64] in { def LWU : Load_ri<0b110, "lwu">; def LD : Load_ri<0b011, "ld">; def SD : Store_rri<0b011, "sd">; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def ADDIW : RVInstI<0b000, OPC_OP_IMM_32, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12), "addiw", "$rd, $rs1, $imm12">; def SLLIW : ShiftW_ri<0, 0b001, "slliw">; def SRLIW : ShiftW_ri<0, 0b101, "srliw">; def SRAIW : ShiftW_ri<1, 0b101, "sraiw">; def ADDW : ALUW_rr<0b0000000, 0b000, "addw">; def SUBW : ALUW_rr<0b0100000, 0b000, "subw">; def SLLW : ALUW_rr<0b0000000, 0b001, "sllw">; def SRLW : ALUW_rr<0b0000000, 0b101, "srlw">; def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">; } // Predicates = [IsRV64] //===----------------------------------------------------------------------===// // Privileged instructions //===----------------------------------------------------------------------===// let isBarrier = 1, isReturn = 1, isTerminator = 1 in { def URET : Priv<"uret", 0b0000000> { let rd = 0; let rs1 = 0; let rs2 = 0b00010; } def SRET : Priv<"sret", 0b0001000> { let rd = 0; let rs1 = 0; let rs2 = 0b00010; } def MRET : Priv<"mret", 0b0011000> { let rd = 0; let rs1 = 0; let rs2 = 0b00010; } } // isBarrier = 1, isReturn = 1, isTerminator = 1 def WFI : Priv<"wfi", 0b0001000> { let rd = 0; let rs1 = 0; let rs2 = 0b00101; } let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1, GPR:$rs2), "sfence.vma", "$rs1, $rs2"> { let rd = 0; } //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) //===----------------------------------------------------------------------===// // TODO la // TODO lb lh lw // TODO RV64I: ld // TODO sb sh sw // TODO RV64I: sd def : InstAlias<"nop", (ADDI X0, X0, 0)>; // TODO li def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>; def : InstAlias<"not $rd, $rs", (XORI GPR:$rd, GPR:$rs, -1)>; def : InstAlias<"neg $rd, $rs", (SUB GPR:$rd, X0, GPR:$rs)>; let Predicates = [IsRV64] in { def : InstAlias<"negw $rd, $rs", (SUBW GPR:$rd, X0, GPR:$rs)>; def : InstAlias<"sext.w $rd, $rs", (ADDIW GPR:$rd, GPR:$rs, 0)>; } // Predicates = [IsRV64] def : InstAlias<"seqz $rd, $rs", (SLTIU GPR:$rd, GPR:$rs, 1)>; def : InstAlias<"snez $rd, $rs", (SLTU GPR:$rd, X0, GPR:$rs)>; def : InstAlias<"sltz $rd, $rs", (SLT GPR:$rd, GPR:$rs, X0)>; def : InstAlias<"sgtz $rd, $rs", (SLT GPR:$rd, X0, GPR:$rs)>; def : InstAlias<"beqz $rs, $offset", (BEQ GPR:$rs, X0, simm13_lsb0:$offset)>; def : InstAlias<"bnez $rs, $offset", (BNE GPR:$rs, X0, simm13_lsb0:$offset)>; def : InstAlias<"blez $rs, $offset", (BGE X0, GPR:$rs, simm13_lsb0:$offset)>; def : InstAlias<"bgez $rs, $offset", (BGE GPR:$rs, X0, simm13_lsb0:$offset)>; def : InstAlias<"bltz $rs, $offset", (BLT GPR:$rs, X0, simm13_lsb0:$offset)>; def : InstAlias<"bgtz $rs, $offset", (BLT X0, GPR:$rs, simm13_lsb0:$offset)>; // Always output the canonical mnemonic for the pseudo branch instructions. // The GNU tools emit the canonical mnemonic for the branch pseudo instructions // as well (e.g. "bgt" will be recognised by the assembler but never printed by // objdump). Match this behaviour by setting a zero weight. def : InstAlias<"bgt $rs, $rt, $offset", (BLT GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>; def : InstAlias<"ble $rs, $rt, $offset", (BGE GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>; def : InstAlias<"bgtu $rs, $rt, $offset", (BLTU GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>; def : InstAlias<"bleu $rs, $rt, $offset", (BGEU GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>; // "ret" has more weight since "ret" and "jr" alias the same "jalr" instruction. def : InstAlias<"j $offset", (JAL X0, simm21_lsb0:$offset)>; def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0:$offset)>; def : InstAlias<"jr $rs", (JALR X0, GPR:$rs, 0)>; def : InstAlias<"jalr $rs", (JALR X1, GPR:$rs, 0)>; def : InstAlias<"ret", (JALR X0, X1, 0), 2>; // TODO call // TODO tail def : InstAlias<"fence", (FENCE 0xF, 0xF)>; // 0xF == iorw // CSR Addresses: 0xC00 == cycle, 0xC01 == time, 0xC02 == instret // 0xC80 == cycleh, 0xC81 == timeh, 0xC82 == instreth def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, 0xC02, X0)>; def : InstAlias<"rdcycle $rd", (CSRRS GPR:$rd, 0xC00, X0)>; def : InstAlias<"rdtime $rd", (CSRRS GPR:$rd, 0xC01, X0)>; let Predicates = [IsRV32] in { def : InstAlias<"rdinstreth $rd", (CSRRS GPR:$rd, 0xC82, X0)>; def : InstAlias<"rdcycleh $rd", (CSRRS GPR:$rd, 0xC80, X0)>; def : InstAlias<"rdtimeh $rd", (CSRRS GPR:$rd, 0xC81, X0)>; } // Predicates = [IsRV32] def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, uimm12:$csr, X0)>; def : InstAlias<"csrw $csr, $rs", (CSRRW X0, uimm12:$csr, GPR:$rs)>; def : InstAlias<"csrs $csr, $rs", (CSRRS X0, uimm12:$csr, GPR:$rs)>; def : InstAlias<"csrc $csr, $rs", (CSRRC X0, uimm12:$csr, GPR:$rs)>; def : InstAlias<"csrwi $csr, $imm", (CSRRWI X0, uimm12:$csr, uimm5:$imm)>; def : InstAlias<"csrsi $csr, $imm", (CSRRSI X0, uimm12:$csr, uimm5:$imm)>; def : InstAlias<"csrci $csr, $imm", (CSRRCI X0, uimm12:$csr, uimm5:$imm)>; def : InstAlias<"sfence.vma", (SFENCE_VMA X0, X0)>; def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>; //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns // // Naming convention: For 'generic' pattern classes, we use the naming // convention PatTy1Ty2. For pattern classes which offer a more complex // expension, prefix the class name, e.g. BccPat. //===----------------------------------------------------------------------===// /// Generic pattern classes class PatGprGpr : Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>; class PatGprSimm12 : Pat<(OpNode GPR:$rs1, simm12:$imm12), (Inst GPR:$rs1, simm12:$imm12)>; class PatGprUimmLog2XLen : Pat<(OpNode GPR:$rs1, uimmlog2xlen:$shamt), (Inst GPR:$rs1, uimmlog2xlen:$shamt)>; /// Predicates def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{ return isOrEquivalentToAdd(N); }]>; /// Immediates def : Pat<(simm12:$imm), (ADDI X0, simm12:$imm)>; def : Pat<(simm32hi20:$imm), (LUI (HI20 imm:$imm))>; def : Pat<(simm32:$imm), (ADDI (LUI (HI20 imm:$imm)), (LO12Sext imm:$imm))>; /// Simple arithmetic operations def : PatGprGpr; def : PatGprSimm12; def : PatGprGpr; def : PatGprGpr; def : PatGprSimm12; def : PatGprGpr; def : PatGprSimm12; def : PatGprGpr; def : PatGprSimm12; def : PatGprGpr; def : PatGprUimmLog2XLen; def : PatGprGpr; def : PatGprUimmLog2XLen; def : PatGprGpr; def : PatGprUimmLog2XLen; /// FrameIndex calculations def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12), (ADDI (i32 AddrFI:$Rs), simm12:$imm12)>; def : Pat<(IsOrAdd (i32 AddrFI:$Rs), simm12:$imm12), (ADDI (i32 AddrFI:$Rs), simm12:$imm12)>; /// Setcc def : PatGprGpr; def : PatGprSimm12; def : PatGprGpr; def : PatGprSimm12; // Define pattern expansions for setcc operations that aren't directly // handled by a RISC-V instruction. def : Pat<(seteq GPR:$rs1, GPR:$rs2), (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>; def : Pat<(setne GPR:$rs1, GPR:$rs2), (SLTU X0, (XOR GPR:$rs1, GPR:$rs2))>; def : Pat<(setugt GPR:$rs1, GPR:$rs2), (SLTU GPR:$rs2, GPR:$rs1)>; def : Pat<(setuge GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>; def : Pat<(setule GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs2, GPR:$rs1), 1)>; def : Pat<(setgt GPR:$rs1, GPR:$rs2), (SLT GPR:$rs2, GPR:$rs1)>; def : Pat<(setge GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs1, GPR:$rs2), 1)>; def : Pat<(setle GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>; let usesCustomInserter = 1 in class SelectCC_rrirr : Pseudo<(outs valty:$dst), (ins cmpty:$lhs, cmpty:$rhs, ixlenimm:$imm, valty:$truev, valty:$falsev), [(set valty:$dst, (SelectCC cmpty:$lhs, cmpty:$rhs, (XLenVT imm:$imm), valty:$truev, valty:$falsev))]>; def Select_GPR_Using_CC_GPR : SelectCC_rrirr; /// Branches and jumps // Match `(brcond (CondOp ..), ..)` and lower to the appropriate RISC-V branch // instruction. class BccPat : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12), (Inst GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12)>; def : BccPat; def : BccPat; def : BccPat; def : BccPat; def : BccPat; def : BccPat; class BccSwapPat : Pat<(brcond (i32 (CondOp GPR:$rs1, GPR:$rs2)), bb:$imm12), (InstBcc GPR:$rs2, GPR:$rs1, bb:$imm12)>; // Condition codes that don't have matching RISC-V branch instructions, but // are trivially supported by swapping the two input operands def : BccSwapPat; def : BccSwapPat; def : BccSwapPat; def : BccSwapPat; // An extra pattern is needed for a brcond without a setcc (i.e. where the // condition was calculated elsewhere). def : Pat<(brcond GPR:$cond, bb:$imm12), (BNE GPR:$cond, X0, bb:$imm12)>; let isBarrier = 1, isBranch = 1, isTerminator = 1 in def PseudoBR : Pseudo<(outs), (ins simm21_lsb0:$imm20), [(br bb:$imm20)]>, PseudoInstExpansion<(JAL X0, simm21_lsb0:$imm20)>; let isCall = 1, Defs=[X1] in let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in def PseudoBRIND : Pseudo<(outs), (ins GPR:$rs1, simm12:$imm12), []>, PseudoInstExpansion<(JALR X0, GPR:$rs1, simm12:$imm12)>; def : Pat<(brind GPR:$rs1), (PseudoBRIND GPR:$rs1, 0)>; def : Pat<(brind (add GPR:$rs1, simm12:$imm12)), (PseudoBRIND GPR:$rs1, simm12:$imm12)>; // PseudoCALL is a pseudo instruction which will eventually expand to auipc // and jalr while encoding. This is desirable, as an auipc+jalr pair with // R_RISCV_CALL and R_RISCV_RELAX relocations can be be relaxed by the linker // if the offset fits in a signed 21-bit immediate. // Define AsmString to print "call" when compile with -S flag. // Define isCodeGenOnly = 0 to support parsing assembly "call" instruction. let isCall = 1, Defs = [X1], isCodeGenOnly = 0 in def PseudoCALL : Pseudo<(outs), (ins bare_symbol:$func), [(Call tglobaladdr:$func)]> { let AsmString = "call\t$func"; } def : Pat<(Call texternalsym:$func), (PseudoCALL texternalsym:$func)>; let isCall = 1, Defs = [X1] in def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rs1), [(Call GPR:$rs1)]>, PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>; let isBarrier = 1, isReturn = 1, isTerminator = 1 in def PseudoRET : Pseudo<(outs), (ins), [(RetFlag)]>, PseudoInstExpansion<(JALR X0, X1, 0)>; // PseudoTAIL is a pseudo instruction similar to PseudoCALL and will eventually // expand to auipc and jalr while encoding. // Define AsmString to print "tail" when compile with -S flag. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2], - hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0 in + isCodeGenOnly = 0 in def PseudoTAIL : Pseudo<(outs), (ins bare_symbol:$dst), []> { let AsmString = "tail\t$dst"; } +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2] in +def PseudoTAILIndirect : Pseudo<(outs), (ins GPRTC:$rs1), [(Tail GPRTC:$rs1)]>, + PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>; + +def : Pat<(Tail (iPTR tglobaladdr:$dst)), + (PseudoTAIL texternalsym:$dst)>; +def : Pat<(Tail (iPTR texternalsym:$dst)), + (PseudoTAIL texternalsym:$dst)>; + /// Loads multiclass LdPat { def : Pat<(LoadOp GPR:$rs1), (Inst GPR:$rs1, 0)>; def : Pat<(LoadOp AddrFI:$rs1), (Inst AddrFI:$rs1, 0)>; def : Pat<(LoadOp (add GPR:$rs1, simm12:$imm12)), (Inst GPR:$rs1, simm12:$imm12)>; def : Pat<(LoadOp (add AddrFI:$rs1, simm12:$imm12)), (Inst AddrFI:$rs1, simm12:$imm12)>; def : Pat<(LoadOp (IsOrAdd AddrFI:$rs1, simm12:$imm12)), (Inst AddrFI:$rs1, simm12:$imm12)>; } defm : LdPat; defm : LdPat; defm : LdPat; defm : LdPat; defm : LdPat; defm : LdPat; defm : LdPat; /// Stores multiclass StPat { def : Pat<(StoreOp StTy:$rs2, GPR:$rs1), (Inst StTy:$rs2, GPR:$rs1, 0)>; def : Pat<(StoreOp StTy:$rs2, AddrFI:$rs1), (Inst StTy:$rs2, AddrFI:$rs1, 0)>; def : Pat<(StoreOp StTy:$rs2, (add GPR:$rs1, simm12:$imm12)), (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>; def : Pat<(StoreOp StTy:$rs2, (add AddrFI:$rs1, simm12:$imm12)), (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>; def : Pat<(StoreOp StTy:$rs2, (IsOrAdd AddrFI:$rs1, simm12:$imm12)), (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>; } defm : StPat; defm : StPat; defm : StPat; /// Other pseudo-instructions // Pessimistically assume the stack pointer will be clobbered let Defs = [X2], Uses = [X2] in { def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), [(CallSeqStart timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), [(CallSeqEnd timm:$amt1, timm:$amt2)]>; } // Defs = [X2], Uses = [X2] //===----------------------------------------------------------------------===// // Standard extensions //===----------------------------------------------------------------------===// include "RISCVInstrInfoM.td" include "RISCVInstrInfoA.td" include "RISCVInstrInfoF.td" include "RISCVInstrInfoD.td" include "RISCVInstrInfoC.td" diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 8e0469f06396..4be8ff9200e9 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -1,217 +1,230 @@ //===-- RISCVRegisterInfo.td - RISC-V Register defs --------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Declarations that describe the RISC-V register files //===----------------------------------------------------------------------===// let Namespace = "RISCV" in { class RISCVReg Enc, string n, list alt = []> : Register { let HWEncoding{4-0} = Enc; let AltNames = alt; } class RISCVReg32 Enc, string n, list alt = []> : Register { let HWEncoding{4-0} = Enc; let AltNames = alt; } // Because RISCVReg64 register have AsmName and AltNames that alias with their // 32-bit sub-register, RISCVAsmParser will need to coerce a register number // from a RISCVReg32 to the equivalent RISCVReg64 when appropriate. def sub_32 : SubRegIndex<32>; class RISCVReg64 : Register<""> { let HWEncoding{4-0} = subreg.HWEncoding{4-0}; let SubRegs = [subreg]; let SubRegIndices = [sub_32]; let AsmName = subreg.AsmName; let AltNames = subreg.AltNames; } def ABIRegAltName : RegAltNameIndex; } // Namespace = "RISCV" // Integer registers // CostPerUse is set higher for registers that may not be compressible as they // are not part of GPRC, the most restrictive register class used by the // compressed instruction set. This will influence the greedy register // allocator to reduce the use of registers that can't be encoded in 16 bit // instructions. This affects register allocation even when compressed // instruction isn't targeted, we see no major negative codegen impact. let RegAltNameIndices = [ABIRegAltName] in { def X0 : RISCVReg<0, "x0", ["zero"]>, DwarfRegNum<[0]>; let CostPerUse = 1 in { def X1 : RISCVReg<1, "x1", ["ra"]>, DwarfRegNum<[1]>; def X2 : RISCVReg<2, "x2", ["sp"]>, DwarfRegNum<[2]>; def X3 : RISCVReg<3, "x3", ["gp"]>, DwarfRegNum<[3]>; def X4 : RISCVReg<4, "x4", ["tp"]>, DwarfRegNum<[4]>; def X5 : RISCVReg<5, "x5", ["t0"]>, DwarfRegNum<[5]>; def X6 : RISCVReg<6, "x6", ["t1"]>, DwarfRegNum<[6]>; def X7 : RISCVReg<7, "x7", ["t2"]>, DwarfRegNum<[7]>; } def X8 : RISCVReg<8, "x8", ["s0"]>, DwarfRegNum<[8]>; def X9 : RISCVReg<9, "x9", ["s1"]>, DwarfRegNum<[9]>; def X10 : RISCVReg<10,"x10", ["a0"]>, DwarfRegNum<[10]>; def X11 : RISCVReg<11,"x11", ["a1"]>, DwarfRegNum<[11]>; def X12 : RISCVReg<12,"x12", ["a2"]>, DwarfRegNum<[12]>; def X13 : RISCVReg<13,"x13", ["a3"]>, DwarfRegNum<[13]>; def X14 : RISCVReg<14,"x14", ["a4"]>, DwarfRegNum<[14]>; def X15 : RISCVReg<15,"x15", ["a5"]>, DwarfRegNum<[15]>; let CostPerUse = 1 in { def X16 : RISCVReg<16,"x16", ["a6"]>, DwarfRegNum<[16]>; def X17 : RISCVReg<17,"x17", ["a7"]>, DwarfRegNum<[17]>; def X18 : RISCVReg<18,"x18", ["s2"]>, DwarfRegNum<[18]>; def X19 : RISCVReg<19,"x19", ["s3"]>, DwarfRegNum<[19]>; def X20 : RISCVReg<20,"x20", ["s4"]>, DwarfRegNum<[20]>; def X21 : RISCVReg<21,"x21", ["s5"]>, DwarfRegNum<[21]>; def X22 : RISCVReg<22,"x22", ["s6"]>, DwarfRegNum<[22]>; def X23 : RISCVReg<23,"x23", ["s7"]>, DwarfRegNum<[23]>; def X24 : RISCVReg<24,"x24", ["s8"]>, DwarfRegNum<[24]>; def X25 : RISCVReg<25,"x25", ["s9"]>, DwarfRegNum<[25]>; def X26 : RISCVReg<26,"x26", ["s10"]>, DwarfRegNum<[26]>; def X27 : RISCVReg<27,"x27", ["s11"]>, DwarfRegNum<[27]>; def X28 : RISCVReg<28,"x28", ["t3"]>, DwarfRegNum<[28]>; def X29 : RISCVReg<29,"x29", ["t4"]>, DwarfRegNum<[29]>; def X30 : RISCVReg<30,"x30", ["t5"]>, DwarfRegNum<[30]>; def X31 : RISCVReg<31,"x31", ["t6"]>, DwarfRegNum<[31]>; } } def XLenVT : ValueTypeByHwMode<[RV32, RV64, DefaultMode], [i32, i64, i32]>; // The order of registers represents the preferred allocation sequence. // Registers are listed in the order caller-save, callee-save, specials. def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add (sequence "X%u", 10, 17), (sequence "X%u", 5, 7), (sequence "X%u", 28, 31), (sequence "X%u", 8, 9), (sequence "X%u", 18, 27), (sequence "X%u", 0, 4) )> { let RegInfos = RegInfoByHwMode< [RV32, RV64, DefaultMode], [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; } // The order of registers represents the preferred allocation sequence. // Registers are listed in the order caller-save, callee-save, specials. def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (add (sequence "X%u", 10, 17), (sequence "X%u", 5, 7), (sequence "X%u", 28, 31), (sequence "X%u", 8, 9), (sequence "X%u", 18, 27), (sequence "X%u", 1, 4) )> { let RegInfos = RegInfoByHwMode< [RV32, RV64, DefaultMode], [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; } def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (add (sequence "X%u", 10, 17), (sequence "X%u", 5, 7), (sequence "X%u", 28, 31), (sequence "X%u", 8, 9), (sequence "X%u", 18, 27), X1, X3, X4 )> { let RegInfos = RegInfoByHwMode< [RV32, RV64, DefaultMode], [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; } def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add (sequence "X%u", 10, 15), (sequence "X%u", 8, 9) )> { let RegInfos = RegInfoByHwMode< [RV32, RV64, DefaultMode], [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; } +// For indirect tail calls, we can't use callee-saved registers, as they are +// restored to the saved value before the tail call, which would clobber a call +// address. +def GPRTC : RegisterClass<"RISCV", [XLenVT], 32, (add + (sequence "X%u", 5, 7), + (sequence "X%u", 10, 17), + (sequence "X%u", 28, 31) + )> { + let RegInfos = RegInfoByHwMode< + [RV32, RV64, DefaultMode], + [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; +} + def SP : RegisterClass<"RISCV", [XLenVT], 32, (add X2)> { let RegInfos = RegInfoByHwMode< [RV32, RV64, DefaultMode], [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; } // Floating point registers let RegAltNameIndices = [ABIRegAltName] in { def F0_32 : RISCVReg32<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; def F1_32 : RISCVReg32<1, "f1", ["ft1"]>, DwarfRegNum<[33]>; def F2_32 : RISCVReg32<2, "f2", ["ft2"]>, DwarfRegNum<[34]>; def F3_32 : RISCVReg32<3, "f3", ["ft3"]>, DwarfRegNum<[35]>; def F4_32 : RISCVReg32<4, "f4", ["ft4"]>, DwarfRegNum<[36]>; def F5_32 : RISCVReg32<5, "f5", ["ft5"]>, DwarfRegNum<[37]>; def F6_32 : RISCVReg32<6, "f6", ["ft6"]>, DwarfRegNum<[38]>; def F7_32 : RISCVReg32<7, "f7", ["ft7"]>, DwarfRegNum<[39]>; def F8_32 : RISCVReg32<8, "f8", ["fs0"]>, DwarfRegNum<[40]>; def F9_32 : RISCVReg32<9, "f9", ["fs1"]>, DwarfRegNum<[41]>; def F10_32 : RISCVReg32<10,"f10", ["fa0"]>, DwarfRegNum<[42]>; def F11_32 : RISCVReg32<11,"f11", ["fa1"]>, DwarfRegNum<[43]>; def F12_32 : RISCVReg32<12,"f12", ["fa2"]>, DwarfRegNum<[44]>; def F13_32 : RISCVReg32<13,"f13", ["fa3"]>, DwarfRegNum<[45]>; def F14_32 : RISCVReg32<14,"f14", ["fa4"]>, DwarfRegNum<[46]>; def F15_32 : RISCVReg32<15,"f15", ["fa5"]>, DwarfRegNum<[47]>; def F16_32 : RISCVReg32<16,"f16", ["fa6"]>, DwarfRegNum<[48]>; def F17_32 : RISCVReg32<17,"f17", ["fa7"]>, DwarfRegNum<[49]>; def F18_32 : RISCVReg32<18,"f18", ["fs2"]>, DwarfRegNum<[50]>; def F19_32 : RISCVReg32<19,"f19", ["fs3"]>, DwarfRegNum<[51]>; def F20_32 : RISCVReg32<20,"f20", ["fs4"]>, DwarfRegNum<[52]>; def F21_32 : RISCVReg32<21,"f21", ["fs5"]>, DwarfRegNum<[53]>; def F22_32 : RISCVReg32<22,"f22", ["fs6"]>, DwarfRegNum<[54]>; def F23_32 : RISCVReg32<23,"f23", ["fs7"]>, DwarfRegNum<[55]>; def F24_32 : RISCVReg32<24,"f24", ["fs8"]>, DwarfRegNum<[56]>; def F25_32 : RISCVReg32<25,"f25", ["fs9"]>, DwarfRegNum<[57]>; def F26_32 : RISCVReg32<26,"f26", ["fs10"]>, DwarfRegNum<[58]>; def F27_32 : RISCVReg32<27,"f27", ["fs11"]>, DwarfRegNum<[59]>; def F28_32 : RISCVReg32<28,"f28", ["ft8"]>, DwarfRegNum<[60]>; def F29_32 : RISCVReg32<29,"f29", ["ft9"]>, DwarfRegNum<[61]>; def F30_32 : RISCVReg32<30,"f30", ["ft10"]>, DwarfRegNum<[62]>; def F31_32 : RISCVReg32<31,"f31", ["ft11"]>, DwarfRegNum<[63]>; foreach Index = 0-31 in { def F#Index#_64 : RISCVReg64("F"#Index#"_32")>, DwarfRegNum<[!add(Index, 32)]>; } } // The order of registers represents the preferred allocation sequence, // meaning caller-save regs are listed before callee-save. def FPR32 : RegisterClass<"RISCV", [f32], 32, (add (sequence "F%u_32", 0, 7), (sequence "F%u_32", 10, 17), (sequence "F%u_32", 28, 31), (sequence "F%u_32", 8, 9), (sequence "F%u_32", 18, 27) )>; def FPR32C : RegisterClass<"RISCV", [f32], 32, (add (sequence "F%u_32", 10, 15), (sequence "F%u_32", 8, 9) )>; // The order of registers represents the preferred allocation sequence, // meaning caller-save regs are listed before callee-save. def FPR64 : RegisterClass<"RISCV", [f64], 64, (add (sequence "F%u_64", 0, 7), (sequence "F%u_64", 10, 17), (sequence "F%u_64", 28, 31), (sequence "F%u_64", 8, 9), (sequence "F%u_64", 18, 27) )>; def FPR64C : RegisterClass<"RISCV", [f64], 64, (add (sequence "F%u_64", 10, 15), (sequence "F%u_64", 8, 9) )>; diff --git a/llvm/test/CodeGen/RISCV/disable-tail-calls.ll b/llvm/test/CodeGen/RISCV/disable-tail-calls.ll new file mode 100644 index 000000000000..6ed5c2abc190 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/disable-tail-calls.ll @@ -0,0 +1,56 @@ +; Check that command line option "-disable-tail-calls" overrides function +; attribute "disable-tail-calls". + +; RUN: llc < %s -mtriple=riscv32-unknown-elf \ +; RUN: | FileCheck %s --check-prefixes=CALLER1,NOTAIL +; RUN: llc < %s -mtriple=riscv32-unknown-elf -disable-tail-calls \ +; RUN: | FileCheck %s --check-prefixes=CALLER1,NOTAIL +; RUN: llc < %s -mtriple=riscv32-unknown-elf -disable-tail-calls=false \ +; RUN: | FileCheck %s --check-prefixes=CALLER1,TAIL + +; RUN: llc < %s -mtriple=riscv32-unknown-elf \ +; RUN: | FileCheck %s --check-prefixes=CALLER2,TAIL +; RUN: llc < %s -mtriple=riscv32-unknown-elf -disable-tail-calls \ +; RUN: | FileCheck %s --check-prefixes=CALLER2,NOTAIL +; RUN: llc < %s -mtriple=riscv32-unknown-elf -disable-tail-calls=false \ +; RUN: | FileCheck %s --check-prefixes=CALLER2,TAIL + +; RUN: llc < %s -mtriple=riscv32-unknown-elf \ +; RUN: | FileCheck %s --check-prefixes=CALLER3,TAIL +; RUN: llc < %s -mtriple=riscv32-unknown-elf -disable-tail-calls \ +; RUN: | FileCheck %s --check-prefixes=CALLER3,NOTAIL +; RUN: llc < %s -mtriple=riscv32-unknown-elf -disable-tail-calls=false \ +; RUN: | FileCheck %s --check-prefixes=CALLER3,TAIL + +; CALLER1-LABEL: {{\_?}}caller1 +; CALLER2-LABEL: {{\_?}}caller2 +; CALLER3-LABEL: {{\_?}}caller3 +; NOTAIL-NOT: tail callee +; NOTAIL: call callee +; TAIL: tail callee +; TAIL-NOT: call callee + +; Function with attribute #0 = { "disable-tail-calls"="true" } +define i32 @caller1(i32 %a) #0 { +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + +; Function with attribute #1 = { "disable-tail-calls"="false" } +define i32 @caller2(i32 %a) #0 { +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + +define i32 @caller3(i32 %a) { +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + +declare i32 @callee(i32) + +attributes #0 = { "disable-tail-calls"="true" } +attributes #1 = { "disable-tail-calls"="false" } diff --git a/llvm/test/CodeGen/RISCV/musttail-call.ll b/llvm/test/CodeGen/RISCV/musttail-call.ll new file mode 100644 index 000000000000..551aa7245ce6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/musttail-call.ll @@ -0,0 +1,20 @@ +; Check that we error out if tail is not possible but call is marked as mustail. + +; RUN: not llc -mtriple riscv32-unknown-linux-gnu -o - %s \ +; RUN: 2>&1 | FileCheck %s +; RUN: not llc -mtriple riscv32-unknown-elf -o - %s \ +; RUN: 2>&1 | FileCheck %s +; RUN: not llc -mtriple riscv64-unknown-linux-gnu -o - %s \ +; RUN: 2>&1 | FileCheck %s +; RUN: not llc -mtriple riscv64-unknown-elf -o - %s \ +; RUN: 2>&1 | FileCheck %s + +%struct.A = type { i32 } + +declare void @callee_musttail(%struct.A* sret %a) +define void @caller_musttail(%struct.A* sret %a) { +; CHECK: LLVM ERROR: failed to perform tail call elimination on a call site marked musttail +entry: + musttail call void @callee_musttail(%struct.A* sret %a) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll new file mode 100644 index 000000000000..2279e8c37792 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -0,0 +1,148 @@ +; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s +; RUN: llc -mtriple riscv32-unknown-elf -o - %s | FileCheck %s + +; Perform tail call optimization for global address. +declare i32 @callee_tail(i32 %i) +define i32 @caller_tail(i32 %i) { +; CHECK-LABEL: caller_tail +; CHECK: tail callee_tail +entry: + %r = tail call i32 @callee_tail(i32 %i) + ret i32 %r +} + +; Perform tail call optimization for external symbol. +@dest = global [2 x i8] zeroinitializer +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) +define void @caller_extern(i8* %src) optsize { +entry: +; CHECK: caller_extern +; CHECK-NOT: call memcpy +; CHECK: tail memcpy + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @dest, i32 0, i32 0), i8* %src, i32 7, i1 false) + ret void +} + +; Perform indirect tail call optimization (for function pointer call). +declare void @callee_indirect1() +declare void @callee_indirect2() +define void @caller_indirect_tail(i32 %a) { +; CHECK-LABEL: caller_indirect_tail +; CHECK-NOT: call callee_indirect1 +; CHECK-NOT: call callee_indirect2 +; CHECK-NOT: tail callee_indirect1 +; CHECK-NOT: tail callee_indirect2 + +; CHECK: lui a0, %hi(callee_indirect2) +; CHECK-NEXT: addi a5, a0, %lo(callee_indirect2) +; CHECK-NEXT: jr a5 + +; CHECK: lui a0, %hi(callee_indirect1) +; CHECK-NEXT: addi a5, a0, %lo(callee_indirect1) +; CHECK-NEXT: jr a5 +entry: + %tobool = icmp eq i32 %a, 0 + %callee = select i1 %tobool, void ()* @callee_indirect1, void ()* @callee_indirect2 + tail call void %callee() + ret void +} + +; Do not tail call optimize functions with varargs. +declare i32 @callee_varargs(i32, ...) +define void @caller_varargs(i32 %a, i32 %b) { +; CHECK-LABEL: caller_varargs +; CHECK-NOT: tail callee_varargs +; CHECK: call callee_varargs +entry: + %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a) + ret void +} + +; Do not tail call optimize if stack is used to pass parameters. +declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) +define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) { +; CHECK-LABEL: caller_args +; CHECK-NOT: tail callee_args +; CHECK: call callee_args +entry: + %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) + ret i32 %r +} + +; Do not tail call optimize if parameters need to be passed indirectly. +declare i32 @callee_indirect_args(fp128 %a) +define void @caller_indirect_args() { +; CHECK-LABEL: caller_indirect_args +; CHECK-NOT: tail callee_indirect_args +; CHECK: call callee_indirect_args +entry: + %call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000) + ret void +} + +; Externally-defined functions with weak linkage should not be tail-called. +; The behaviour of branch instructions in this situation (as used for tail +; calls) is implementation-defined, so we cannot rely on the linker replacing +; the tail call with a return. +declare extern_weak void @callee_weak() +define void @caller_weak() { +; CHECK-LABEL: caller_weak +; CHECK-NOT: tail callee_weak +; CHECK: call callee_weak +entry: + tail call void @callee_weak() + ret void +} + +; Exception-handling functions need a special set of instructions to indicate a +; return to the hardware. Tail-calling another function would probably break +; this. +declare void @callee_irq() +define void @caller_irq() #0 { +; CHECK-LABEL: caller_irq +; CHECK-NOT: tail callee_irq +; CHECK: call callee_irq +entry: + tail call void @callee_irq() + ret void +} +attributes #0 = { "interrupt" } + +; Byval parameters hand the function a pointer directly into the stack area +; we want to reuse during a tail call. Do not tail call optimize functions with +; byval parameters. +declare i32 @callee_byval(i32** byval %a) +define i32 @caller_byval() { +; CHECK-LABEL: caller_byval +; CHECK-NOT: tail callee_byval +; CHECK: call callee_byval +entry: + %a = alloca i32* + %r = tail call i32 @callee_byval(i32** byval %a) + ret i32 %r +} + +; Do not tail call optimize if callee uses structret semantics. +%struct.A = type { i32 } +@a = global %struct.A zeroinitializer + +declare void @callee_struct(%struct.A* sret %a) +define void @caller_nostruct() { +; CHECK-LABEL: caller_nostruct +; CHECK-NOT: tail callee_struct +; CHECK: call callee_struct +entry: + tail call void @callee_struct(%struct.A* sret @a) + ret void +} + +; Do not tail call optimize if caller uses structret semantics. +declare void @callee_nostruct() +define void @caller_struct(%struct.A* sret %a) { +; CHECK-LABEL: caller_struct +; CHECK-NOT: tail callee_nostruct +; CHECK: call callee_nostruct +entry: + tail call void @callee_nostruct() + ret void +}