Index: lib/Target/Mips/MicroMipsInstrInfo.td =================================================================== --- lib/Target/Mips/MicroMipsInstrInfo.td +++ lib/Target/Mips/MicroMipsInstrInfo.td @@ -480,6 +480,18 @@ let isIndirectBranch = 1; } +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1, + hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in { + class TailCall_MM : + PseudoSE<(outs), (ins calltarget:$target), [], II_J>, + PseudoInstExpansion<(JumpInst jmptarget_mm:$target)>; + + class TailCallReg_MM : + PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>, + PseudoInstExpansion<(JRInst ResRO:$rs)>; +} + // Break16 and Sdbbp16 class BrkSdbbp16MM : MicroMipsInst16<(outs), (ins uimm4:$code_), @@ -980,6 +992,12 @@ def PREFX_MM : PrefetchIndexed<"prefx">, POOL32F_PREFX_FM_MM<0x15, 0x1A0>; } +let AdditionalPredicates = [InMicroMips] in { + def TAILCALL_MM : TailCall_MM, ISA_MIPS1_NOT_32R6_64R6; + def TAILCALLREG_MM : TailCallReg_MM, + ISA_MIPS1_NOT_32R6_64R6; +} + let DecoderNamespace = "MicroMips" in { def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware, RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6; Index: lib/Target/Mips/Mips32r6InstrInfo.td =================================================================== --- lib/Target/Mips/Mips32r6InstrInfo.td +++ lib/Target/Mips/Mips32r6InstrInfo.td @@ -847,6 +847,8 @@ def SEL_S : R6MMR6Rel, SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT; } def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6; +def TAILCALL_R6 : TailCall, ISA_MIPS32R6; +def TAILCALLREG_R6 : TailCallReg, GPR_32, ISA_MIPS32R6; //===----------------------------------------------------------------------===// // Index: lib/Target/Mips/Mips64InstrInfo.td =================================================================== --- lib/Target/Mips/Mips64InstrInfo.td +++ lib/Target/Mips/Mips64InstrInfo.td @@ -224,22 +224,24 @@ ISA_MIPS2_NOT_32R6_64R6; def SC64 : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, PTR_64, ISA_MIPS2_NOT_32R6_64R6; +def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>, PTR_64; } +def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM; + /// Jump and Branch Instructions let isCodeGenOnly = 1 in { - def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>; def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>; def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>; def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>; def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>; def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>; def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>; - def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM; def JALR64Pseudo : JumpLinkRegPseudo; - def TAILCALL64_R : TailCallReg; + def TAILCALLREG64 : TailCallReg, GPR_64, ISA_MIPS1_NOT_32R6_64R6; } + def PseudoReturn64 : PseudoReturnBase; def PseudoIndirectBranch64 : PseudoIndirectBranchBase; Index: lib/Target/Mips/Mips64r6InstrInfo.td =================================================================== --- lib/Target/Mips/Mips64r6InstrInfo.td +++ lib/Target/Mips/Mips64r6InstrInfo.td @@ -134,6 +134,10 @@ def JIALC64 : JIALC_ENC, JIALC64_DESC, ISA_MIPS64R6; def JIC64 : JIC_ENC, JIC64_DESC, ISA_MIPS64R6; } +def TAILCALL64_R6 : TailCall, ISA_MIPS64R6; +def TAILCALLREG64_R6 : TailCallReg, GPR_64, + ISA_MIPS64R6; + //===----------------------------------------------------------------------===// // // Instruction Aliases Index: lib/Target/Mips/MipsDelaySlotFiller.cpp =================================================================== --- lib/Target/Mips/MipsDelaySlotFiller.cpp +++ lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -543,6 +543,9 @@ // For given opcode returns opcode of corresponding instruction with short // delay slot. +// For the pseudo TAILCALL*_MM instrunctions return the short delay slot +// form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range +// that is too short to make use of for tail calls. static int getEquivalentCallShort(int Opcode) { switch (Opcode) { case Mips::BGEZAL: @@ -555,6 +558,10 @@ return Mips::JALRS_MM; case Mips::JALR16_MM: return Mips::JALRS16_MM; + case Mips::TAILCALL_MM: + llvm_unreachable("Attempting to shorten the TAILCALL_MM pseudo!"); + case Mips::TAILCALLREG_MM: + return Mips::JR16_MM; default: llvm_unreachable("Unexpected call instruction for microMIPS."); } @@ -606,6 +613,12 @@ DSI->isCall()) { // If instruction in delay slot is 16b change opcode to // corresponding instruction with short delay slot. + + // TODO: Implement an instruction mapping table of 16bit opcodes to + // 32bit opcodes so that an instruction can be expanded. This would + // save 16 bits as a TAILCALL_MM pseudo requires a fullsized nop. + // TODO: Permit b16 when branching backwards to the the same function + // if it is in range. DSI->setDesc(TII->get(getEquivalentCallShort(DSI->getOpcode()))); } continue; @@ -692,9 +705,14 @@ bool InMicroMipsMode = STI.inMicroMipsMode(); const MipsInstrInfo *TII = STI.getInstrInfo(); unsigned Opcode = (*Slot).getOpcode(); + // This is complicated by the tail call optimization. For non-PIC code + // there is only a 32bit sized unconditional branch which can be assumed + // to be able to reach the target. b16 only has a range of +/- 1 KB. + // It's entirely possible that the target function is reachable with b16 + // but we don't have enough information to make that decision. if (InMicroMipsMode && TII->GetInstSizeInBytes(&(*CurrI)) == 2 && (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch || - Opcode == Mips::PseudoReturn)) + Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL)) continue; Filler = CurrI; Index: lib/Target/Mips/MipsInstrInfo.cpp =================================================================== --- lib/Target/Mips/MipsInstrInfo.cpp +++ lib/Target/Mips/MipsInstrInfo.cpp @@ -279,6 +279,7 @@ case Mips::JR: case Mips::PseudoReturn: case Mips::PseudoIndirectBranch: + case Mips::TAILCALLREG_MM: canUseShortMicroMipsCTI = true; break; } @@ -341,6 +342,8 @@ case Mips::JR: case Mips::PseudoReturn: case Mips::PseudoIndirectBranch: + case Mips::TAILCALLREG_MM: + case Mips::TAILCALLREG_R6: if (canUseShortMicroMipsCTI) return Mips::JRC16_MM; return Mips::JIC; @@ -349,6 +352,7 @@ case Mips::JR64: case Mips::PseudoReturn64: case Mips::PseudoIndirectBranch64: + case Mips::TAILCALLREG64_R6: return Mips::JIC64; case Mips::JALR64Pseudo: return Mips::JIALC64; Index: lib/Target/Mips/MipsInstrInfo.td =================================================================== --- lib/Target/Mips/MipsInstrInfo.td +++ lib/Target/Mips/MipsInstrInfo.td @@ -1877,8 +1877,11 @@ def BLTZALL : MMRel, BGEZAL_FT<"bltzall", brtarget, GPR32Opnd, 0>, BGEZAL_FM<0x12>, ISA_MIPS2_NOT_32R6_64R6; def BAL_BR : BAL_BR_Pseudo; -def TAILCALL : TailCall; -def TAILCALL_R : TailCallReg; + +let AdditionalPredicates = [NotInMicroMips] in { +def TAILCALL : TailCall, ISA_MIPS1_NOT_32R6_64R6; +def TAILCALLREG : TailCallReg, ISA_MIPS1_NOT_32R6_64R6, GPR_32; +} // Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64 // then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA. Index: lib/Target/Mips/MipsSEISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsSEISelLowering.cpp +++ lib/Target/Mips/MipsSEISelLowering.cpp @@ -27,8 +27,8 @@ #define DEBUG_TYPE "mips-isel" static cl::opt -EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, - cl::desc("MIPS: Enable tail calls."), cl::init(false)); +UseMipsTailCalls("mips-tail-calls", cl::Hidden, + cl::desc("MIPS: permit tail calls."), cl::init(true)); static cl::opt NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), cl::desc("Expand double precision loads and " @@ -1178,7 +1178,7 @@ bool MipsSETargetLowering::isEligibleForTailCallOptimization( const CCState &CCInfo, unsigned NextStackOffset, const MipsFunctionInfo &FI) const { - if (!EnableMipsTailCalls) + if (!UseMipsTailCalls) return false; // Exception has to be cleared with eret. Index: test/CodeGen/Mips/biggot.ll =================================================================== --- test/CodeGen/Mips/biggot.ll +++ test/CodeGen/Mips/biggot.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=mipsel -mxgot -relocation-model=pic < %s | FileCheck %s -check-prefix=O32 -; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+n64 -mxgot -relocation-model=pic < %s | \ +; RUN: llc -march=mips64el -mcpu=mips64r2 -mxgot -relocation-model=pic < %s | \ ; RUN: FileCheck %s -check-prefix=N64 @v0 = external global i32 @@ -15,9 +15,9 @@ ; N64: lui $[[R0:[0-9]+]], %got_hi(v0) ; N64: daddu $[[R1:[0-9]+]], $[[R0]], ${{[a-z0-9]+}} -; N64: ld ${{[0-9]+}}, %got_lo(v0)($[[R1]]) ; N64: lui $[[R2:[0-9]+]], %call_hi(foo0) ; N64: daddu $[[R3:[0-9]+]], $[[R2]], ${{[a-z0-9]+}} +; N64: ld ${{[0-9]+}}, %got_lo(v0)($[[R1]]) ; N64: ld ${{[0-9]+}}, %call_lo(foo0)($[[R3]]) %0 = load i32, i32* @v0, align 4 Index: test/CodeGen/Mips/brdelayslot.ll =================================================================== --- test/CodeGen/Mips/brdelayslot.ll +++ test/CodeGen/Mips/brdelayslot.ll @@ -22,7 +22,7 @@ ; None: nop ; None: .end - tail call void @foo2(i32 3) nounwind + call void @foo2(i32 3) nounwind ret void } @@ -37,7 +37,7 @@ ; Default: cvt.d.w %conv = sitofp i32 %a to double - tail call void @foo4(double %conv) nounwind + call void @foo4(double %conv) nounwind ret void } @@ -83,7 +83,7 @@ define void @foo6(float %a0, double %a1) nounwind { entry: - tail call void @foo7(double %a1, float %a0) nounwind + call void @foo7(double %a1, float %a0) nounwind ret void } @@ -101,7 +101,7 @@ entry: store i32 %a, i32* @g1, align 4 %0 = load void ()*, void ()** @foo9, align 4 - tail call void %0() nounwind + call void %0() nounwind %1 = load i32, i32* @g1, align 4 %add = add nsw i32 %1, %a ret i32 %add Index: test/CodeGen/Mips/cconv/reserved-space.ll =================================================================== --- test/CodeGen/Mips/cconv/reserved-space.ll +++ test/CodeGen/Mips/cconv/reserved-space.ll @@ -17,7 +17,7 @@ define void @reserved_space() nounwind { entry: - tail call void @foo() + call void @foo() ret void } Index: test/CodeGen/Mips/compactbranches/tailrecursion.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/compactbranches/tailrecursion.ll @@ -0,0 +1,39 @@ +; RUN: llc -march=mips -mcpu=mips32 -relocation-model=static < %s | FileCheck -check-prefix=STATICPRER6 %s +; RUN: llc -march=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=static < %s | FileCheck -check-prefix=STATICPRER6MM %s +; RUN: llc -march=mips -mcpu=mips32 -relocation-model=pic < %s | FileCheck -check-prefix=PICPRER6 %s +; RUN: llc -march=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic < %s | FileCheck -check-prefix=PICPRER6MM %s +; RUN: llc -march=mips -mcpu=mips32r6 -relocation-model=static < %s | FileCheck -check-prefix=STATICR6 %s +; RUN: llc -march=mips -mcpu=mips32r6 -relocation-model=pic < %s | FileCheck -check-prefix=PICR6 %s +; RUN: llc -march=mips -mcpu=mips32r6 -relocation-model=pic -mattr=+micromips < %s | FileCheck -check-prefix=PICR6MM %s +; RUN: llc -march=mips -mcpu=mips64 -target-abi n64 -relocation-model=pic < %s | FileCheck -check-prefix=PICPRER6-64 %s +; RUN: llc -march=mips -mcpu=mips64 -target-abi n64 -relocation-model=pic -mattr=+micromips< %s | FileCheck -check-prefix=PICPRER6MM-64 %s +; RUN: llc -march=mips -mcpu=mips64r6 -target-abi n64 -relocation-model=pic < %s | FileCheck -check-prefix=PICR6-64 %s +; RUN: llc -march=mips -mcpu=mips64r6 -target-abi n64 -relocation-model=pic -mattr=+micromips < %s | FileCheck -check-prefix=PICR6-64 %s + +; Test that wherever possible, the codegen pseudo instruction TAILCALL and +; TAILCALLREG for MIPS, microMIPS and the 64bit variants are compacted if +; possible when the delay slot cannot be filled. + +; Some PIC cases for 64bit save and restore the $gp pointer spuriously +; compared to the 32bit case, so test for 'jr' in those cases. + +declare i32 @g(i32) + +define i32 @f(i32 %a) { +; CHECK-LABEL: f: +; STATICPRER6: j +; STATICPRER6MM: j +; PICPRER6: jr $25 +; PICPRER6MM: jrc $25 +; PICPRER6MM-64: jr $25 +; PICPRER6-64: jr $25 + +; STATICR6: j +; PICR6: jrc $25 +; microMIPSR6 does not have delay slots, but uses the mnenomic 'jr' instead of 'jrc'. +; PICR6MM: jr $25 +; PICR6-64: jr $25 + %1 = tail call i32 @g(i32 %a) + ret i32 %1 + +} Index: test/CodeGen/Mips/fastcc.ll =================================================================== --- test/CodeGen/Mips/fastcc.ll +++ test/CodeGen/Mips/fastcc.ll @@ -290,7 +290,7 @@ ; NOODDSPREG-DAG: lwc1 $f18, 36($[[R0]]) ; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], 40($[[R0]]) -; NOODDSPREG-DAG: swc1 $[[F0]], 0($sp) +; NOODDSPREG-DAG: swc1 $[[F0]], 8($sp) %0 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4 %1 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 1), align 4 Index: test/CodeGen/Mips/gpreg-lazy-binding.ll =================================================================== --- test/CodeGen/Mips/gpreg-lazy-binding.ll +++ test/CodeGen/Mips/gpreg-lazy-binding.ll @@ -6,7 +6,7 @@ ; CHECK: jalr $25 ; CHECK: nop ; CHECK-NOT: move $gp -; CHECK: jalr $25 +; CHECK: jr $25 define void @f0() nounwind { entry: Index: test/CodeGen/Mips/i64arg.ll =================================================================== --- test/CodeGen/Mips/i64arg.ll +++ test/CodeGen/Mips/i64arg.ll @@ -2,14 +2,14 @@ define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind { entry: -; CHECK-DAG: lw $[[R2:[0-9]+]], 80($sp) -; CHECK-DAG: lw $[[R3:[0-9]+]], 84($sp) +; CHECK-DAG: lw $[[R2:[0-9]+]], 64($sp) +; CHECK-DAG: lw $[[R3:[0-9]+]], 68($sp) ; CHECK-DAG: move $[[R1:[0-9]+]], $5 ; CHECK-DAG: move $[[R0:[0-9]+]], $4 ; CHECK-DAG: ori $6, ${{[0-9]+}}, 3855 ; CHECK-DAG: ori $7, ${{[0-9]+}}, 22136 ; CHECK-DAG: lw $25, %call16(ff1) -; CHECK: jalr +; CHECK: jalr $25 tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind ; CHECK-DAG: lw $25, %call16(ff2) ; CHECK-DAG: move $4, $[[R2]] @@ -18,11 +18,11 @@ tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind %sub = add nsw i32 %i, -1 ; CHECK-DAG: lw $25, %call16(ff3) -; CHECK-DAG: sw $[[R1]], 28($sp) -; CHECK-DAG: sw $[[R0]], 24($sp) +; CHECK-DAG: sw $[[R1]], 76($sp) +; CHECK-DAG: sw $[[R0]], 72($sp) ; CHECK-DAG: move $6, $[[R2]] ; CHECK-DAG: move $7, $[[R3]] -; CHECK: jalr $25 +; CHECK: jr $25 tail call void @ff3(i32 %i, i64 %ll, i32 %sub, i64 %ll1) nounwind ret void } Index: test/CodeGen/Mips/indirectcall.ll =================================================================== --- test/CodeGen/Mips/indirectcall.ll +++ test/CodeGen/Mips/indirectcall.ll @@ -2,7 +2,7 @@ define void @foo0(void (i32)* nocapture %f1) nounwind { entry: -; CHECK: jalr $25 +; CHECK: jr $25 tail call void %f1(i32 13) nounwind ret void } Index: test/CodeGen/Mips/lazy-binding.ll =================================================================== --- test/CodeGen/Mips/lazy-binding.ll +++ test/CodeGen/Mips/lazy-binding.ll @@ -30,7 +30,7 @@ ; CHECK: lw $25, %call16(foo2)(${{[0-9]+}}) ; CHECK: jalr $25 ; CHECK: lw $25, %call16(foo2)(${{[0-9]+}}) -; CHECK: jalr $25 +; CHECK: jr $25 define void @foo1() { entry: Index: test/CodeGen/Mips/llvm-ir/call.ll =================================================================== --- test/CodeGen/Mips/llvm-ir/call.ll +++ test/CodeGen/Mips/llvm-ir/call.ll @@ -1,18 +1,17 @@ ; Test the 'call' instruction and the tailcall variant. -; FIXME: We should remove the need for -enable-mips-tail-calls -; RUN: llc -march=mips -mcpu=mips32 -relocation-model=pic -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=NOT-R6C -; RUN: llc -march=mips -mcpu=mips32r2 -relocation-model=pic -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=NOT-R6C -; RUN: llc -march=mips -mcpu=mips32r3 -relocation-model=pic -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=NOT-R6C -; RUN: llc -march=mips -mcpu=mips32r5 -relocation-model=pic -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=NOT-R6C -; RUN: llc -march=mips -mcpu=mips32r6 -relocation-model=pic -disable-mips-delay-filler -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=R6C -; RUN: llc -march=mips -mcpu=mips32r6 -relocation-model=pic -mattr=+fp64,+nooddspreg -disable-mips-delay-filler -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=R6C -; RUN: llc -march=mips64 -mcpu=mips4 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NOT-R6C -; RUN: llc -march=mips64 -mcpu=mips64 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NOT-R6C -; RUN: llc -march=mips64 -mcpu=mips64r2 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NOT-R6C -; RUN: llc -march=mips64 -mcpu=mips64r3 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NOT-R6C -; RUN: llc -march=mips64 -mcpu=mips64r5 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NOT-R6C -; RUN: llc -march=mips64 -mcpu=mips64r6 -disable-mips-delay-filler -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=R6C +; RUN: llc -march=mips -mcpu=mips32 -relocation-model=pic < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=NOT-R6C +; RUN: llc -march=mips -mcpu=mips32r2 -relocation-model=pic < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=NOT-R6C +; RUN: llc -march=mips -mcpu=mips32r3 -relocation-model=pic < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=NOT-R6C +; RUN: llc -march=mips -mcpu=mips32r5 -relocation-model=pic < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=NOT-R6C +; RUN: llc -march=mips -mcpu=mips32r6 -relocation-model=pic -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=R6C +; RUN: llc -march=mips -mcpu=mips32r6 -relocation-model=pic -mattr=+fp64,+nooddspreg -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32 -check-prefix=R6C +; RUN: llc -march=mips64 -mcpu=mips4 < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NOT-R6C +; RUN: llc -march=mips64 -mcpu=mips64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NOT-R6C +; RUN: llc -march=mips64 -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NOT-R6C +; RUN: llc -march=mips64 -mcpu=mips64r3 < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NOT-R6C +; RUN: llc -march=mips64 -mcpu=mips64r5 < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NOT-R6C +; RUN: llc -march=mips64 -mcpu=mips64r6 -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=R6C declare void @extern_void_void() declare i32 @extern_i32_void() @@ -76,7 +75,8 @@ ; N64: ld $[[TGT:[0-9]+]], %call16(extern_void_void)($gp) -; ALL: jr $[[TGT]] +; NOT-R6C: jr $[[TGT]] +; R6C: jrc $[[TGT]] musttail call void @extern_void_void() ret void @@ -89,7 +89,8 @@ ; N64: ld $[[TGT:[0-9]+]], %call16(extern_i32_void)($gp) -; ALL: jr $[[TGT]] +; NOT-R6C: jr $[[TGT]] +; R6C: jrc $[[TGT]] %1 = musttail call i32 @extern_i32_void() ret i32 %1 @@ -102,7 +103,8 @@ ; N64: ld $[[TGT:[0-9]+]], %call16(extern_float_void)($gp) -; ALL: jr $[[TGT]] +; NOT-R6C: jr $[[TGT]] +; R6C: jrc $[[TGT]] %1 = musttail call float @extern_float_void() ret float %1 @@ -154,7 +156,8 @@ ; ALL-LABEL: tail_indirect_call_void_void: ; ALL: move $25, $4 -; ALL: jr $25 +; NOT-R6C: jr $[[TGT]] +; R6C: jrc $[[TGT]] tail call void %addr() ret void @@ -164,7 +167,8 @@ ; ALL-LABEL: tail_indirect_call_i32_void: ; ALL: move $25, $4 -; ALL: jr $25 +; NOT-R6C: jr $[[TGT]] +; R6C: jrc $[[TGT]] %1 = tail call i32 %addr() ret i32 %1 @@ -174,7 +178,8 @@ ; ALL-LABEL: tail_indirect_call_float_void: ; ALL: move $25, $4 -; ALL: jr $25 +; NOT-R6C: jr $[[TGT]] +; R6C: jrc $[[TGT]] %1 = tail call float %addr() ret float %1 @@ -188,7 +193,8 @@ ; ALL-LABEL: thunk_undef_double: ; O32: # implicit-def: %A2 ; O32: # implicit-def: %A3 -; ALL: jr $25 +; NOT-R6C: jr $[[TGT]] +; R6C: jrc $[[TGT]] tail call void @undef_double(i32 undef, double undef) #8 ret void Index: test/CodeGen/Mips/nacl-branch-delay.ll =================================================================== --- test/CodeGen/Mips/nacl-branch-delay.ll +++ test/CodeGen/Mips/nacl-branch-delay.ll @@ -44,7 +44,7 @@ define void @test2() { store i32 1, i32* @x, align 4 - tail call void @f2() + call void @f2() ret void Index: test/CodeGen/Mips/tailcall.ll =================================================================== --- test/CodeGen/Mips/tailcall.ll +++ test/CodeGen/Mips/tailcall.ll @@ -1,13 +1,32 @@ -; RUN: llc -march=mipsel -relocation-model=pic -enable-mips-tail-calls \ +; RUN: llc -march=mipsel -relocation-model=pic \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC32 -; RUN: llc -march=mipsel -relocation-model=static -enable-mips-tail-calls \ +; RUN: llc -march=mipsel -relocation-model=static \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=STATIC32 -; RUN: llc -march=mips64el -mcpu=mips64r2 -enable-mips-tail-calls \ +; RUN: llc -march=mips64el -mcpu=mips64r2 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=N64 ; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic \ -; RUN: -enable-mips-tail-calls -verify-machineinstrs < %s | \ +; RUN: -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=PIC16 +; RUN: llc -march=mipsel -relocation-model=pic -mattr=+micromips < %s | \ +; RUN: FileCheck %s -check-prefix=PIC32 +; RUN: llc -march=mipsel -relocation-model=static -mattr=+micromips \ +; RUN: < %s | FileCheck %s -check-prefix=STATIC32 + +; RUN: llc -march=mipsel -relocation-model=pic -mcpu=mips32r6 < %s | \ +; RUN: FileCheck %s -check-prefix=PIC32 +; RUN: llc -march=mipsel -relocation-model=static -mcpu=mips32r6 \ +; RUN: < %s | FileCheck %s -check-prefix=STATIC32 +; RUN: llc -march=mips64el -mcpu=mips64r6 \ +; RUN: < %s | FileCheck %s -check-prefix=N64 + +; RUN: llc -march=mipsel -relocation-model=pic -mcpu=mips32r6 -mattr=+micromips \ +; RUN: < %s | FileCheck %s -check-prefix=PIC32 +; RUN: llc -march=mipsel -relocation-model=static -mcpu=mips32r6 \ +; RUN: -mattr=+micromips < %s | FileCheck %s -check-prefix=STATIC32 +; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=+micromips < %s \ +; RUN: | FileCheck %s -check-prefix=N64 + @g0 = common global i32 0, align 4 @g1 = common global i32 0, align 4 @g2 = common global i32 0, align 4 @@ -74,13 +93,13 @@ define i32 @caller5() nounwind readonly { entry: ; PIC32: .ent caller5 -; PIC32-NOT: jalr +; PIC32-NOT: jalr $25 ; PIC32: .end caller5 ; STATIC32: .ent caller5 ; STATIC32-NOT: jal ; STATIC32: .end caller5 ; N64: .ent caller5 -; N64-NOT: jalr +; N64-NOT: jalr $25 ; N64: .end caller5 ; PIC16: .ent caller5 ; PIC16: jalrc @@ -118,6 +137,19 @@ define i32 @caller8_0() nounwind { entry: +; PIC32: .ent caller8_0 +; PIC32: jr +; PIC32: .end caller8_0 +; STATIC32: .ent caller8_0 +; STATIC32: j +; STATIC32: .end caller8_0 +; N64: .ent caller8_0 +; N64-NOT: jalr $25 +; N64: .end caller8_0 +; PIC16: .ent caller8_0 +; PIC16: jalrc +; PIC16: .end caller8_0 + %call = tail call fastcc i32 @caller8_1() ret i32 %call } @@ -131,7 +163,7 @@ ; STATIC32: jal ; STATIC32: .end caller8_1 ; N64: .ent caller8_1 -; N64-NOT: jalr +; N64-NOT: jalr $25 ; N64: .end caller8_1 ; PIC16: .ent caller8_1 ; PIC16: jalrc @@ -149,6 +181,18 @@ define i32 @caller9_0() nounwind { entry: +; PIC32: .ent caller9_0 +; PIC32: jr +; PIC32: .end caller9_0 +; STATIC32: .ent caller9_0 +; STATIC32: j +; STATIC32: .end caller9_0 +; N64: .ent caller9_0 +; N64-NOT: jalr $25 +; N64: .end caller9_0 +; PIC16: .ent caller9_0 +; PIC16: jalrc +; PIC16: .end caller9_0 %call = tail call fastcc i32 @caller9_1() ret i32 %call } @@ -177,11 +221,11 @@ define i32 @caller10(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) nounwind { entry: ; PIC32: .ent caller10 -; PIC32-NOT: jalr +; PIC32-NOT: jalr $25 ; STATIC32: .ent caller10 ; STATIC32-NOT: jal ; N64: .ent caller10 -; N64-NOT: jalr +; N64-NOT: jalr $25 ; PIC16: .ent caller10 ; PIC16: jalrc @@ -236,7 +280,7 @@ ; STATIC32: .ent caller13 ; STATIC32-NOT: jal ; N64: .ent caller13 -; N64-NOT: jalr +; N64-NOT: jalr $25 ; PIC16: .ent caller13 ; PIC16: jalrc Index: test/CodeGen/Mips/tailrecursion.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/tailrecursion.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=mips -mcpu=mips32 -relocation-model=static < %s | FileCheck -check-prefix=STATICPRER6 %s +; RUN: llc -march=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=static < %s | FileCheck -check-prefix=STATICPRER6MM %s +; RUN: llc -march=mips -mcpu=mips32 -relocation-model=pic < %s | FileCheck -check-prefix=PICPRER6 %s +; RUN: llc -march=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic < %s | FileCheck -check-prefix=PICPRER6MM %s +; RUN: llc -march=mips -mcpu=mips32r6 -relocation-model=static < %s | FileCheck -check-prefix=STATICR6 %s +; RUN: llc -march=mips -mcpu=mips32r6 -relocation-model=pic < %s | FileCheck -check-prefix=PICR6 %s +; RUN: llc -march=mips -mcpu=mips32r6 -relocation-model=pic -mattr=+micromips < %s | FileCheck -check-prefix=PICR6MM %s +; RUN: llc -march=mips -mcpu=mips64 -target-abi n64 -relocation-model=pic < %s | FileCheck -check-prefix=PICPRER6-64 %s +; RUN: llc -march=mips -mcpu=mips64 -target-abi n64 -relocation-model=pic -mattr=+micromips< %s | FileCheck -check-prefix=PICPRER6MM-64 %s +; RUN: llc -march=mips -mcpu=mips64r6 -target-abi n64 -relocation-model=pic < %s | FileCheck -check-prefix=PICR6-64 %s +; RUN: llc -march=mips -mcpu=mips64r6 -target-abi n64 -relocation-model=pic -mattr=+micromips < %s | FileCheck -check-prefix=PICR6-64 %s + +; A basic tail recursion test that also tests branch shortening. + +define i32 @k(i32 %a, i32 %b) { +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.end: + %0 = add i32 %b, 10 + %1 = sub i32 %a, 1 +; STATICPRER6: j +; STATICPRER6MM: j +; PICPRER6: jr $25 +; PICPRER6MM: jrc $25 +; PICPRER6MM-64: jr $25 +; PICPRER6-64: jr $25 + +; STATICR6: j +; PICR6: jr $25 +; PICR6MM: jr $25 +; PICR6-64: jr $25 + %2 = tail call i32 @k(i32 %1, i32 %0) + ret i32 %2 + +if.then: +; STATICPRER6: jr $ra +; STATICPRER6MM: jrc $ra +; PICPRER6: jr $ra +; PICPRER6MM: jrc $ra +; PICPRER6MM-64: jr $ra +; PICPRER6-64: jr $ra + +; STATICR6: jr $ra +; PICR6: jr $ra +; PICR6MM: jr $ra +; PICR6-64: jr $ra + ret i32 %b + +}