diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -175,6 +175,7 @@ kw_amdgpu_kernel, kw_amdgpu_gfx, kw_tailcc, + kw_m68k_rtdcc, // Attributes: kw_attributes, diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -245,6 +245,9 @@ /// placement. Preserves active lane values for input VGPRs. AMDGPU_CS_ChainPreserve = 105, + /// Used for M68k rtd-based CC (similar to X86's stdcall). + M68k_RTD = 106, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -632,6 +632,7 @@ KEYWORD(amdgpu_kernel); KEYWORD(amdgpu_gfx); KEYWORD(tailcc); + KEYWORD(m68k_rtdcc); KEYWORD(cc); KEYWORD(c); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1999,6 +1999,7 @@ /// ::= 'amdgpu_cs_chain_preserve' /// ::= 'amdgpu_kernel' /// ::= 'tailcc' +/// ::= 'm68k_rtdcc' /// ::= 'cc' UINT /// bool LLParser::parseOptionalCallingConv(unsigned &CC) { @@ -2067,6 +2068,7 @@ break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; case lltok::kw_tailcc: CC = CallingConv::Tail; break; + case lltok::kw_m68k_rtdcc: CC = CallingConv::M68k_RTD; break; case lltok::kw_cc: { Lex.Lex(); return parseUInt32(CC); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -350,6 +350,7 @@ break; case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break; case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break; + case CallingConv::M68k_RTD: Out << "m68k_rtdcc"; break; } } diff --git a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp --- a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp +++ b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp @@ -258,32 +258,22 @@ if (StackAdj == 0) { MIB = BuildMI(MBB, MBBI, DL, TII->get(M68k::RTS)); - } else if (isUInt<16>(StackAdj)) { - - if (STI->atLeastM68020()) { - llvm_unreachable("RTD is not implemented"); - } else { - // Copy PC from stack to a free address(A0 or A1) register - // TODO check if pseudo expand uses free address register - BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32aj), M68k::A1) - .addReg(M68k::SP); + } else { + // Copy return address from stack to a free address(A0 or A1) register + // TODO check if pseudo expand uses free address register + BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32aj), M68k::A1) + .addReg(M68k::SP); - // Adjust SP - FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true); + // Adjust SP + FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true); - // Put the return address on stack - BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32ja)) - .addReg(M68k::SP) - .addReg(M68k::A1); + // Put the return address on stack + BuildMI(MBB, MBBI, DL, TII->get(M68k::MOV32ja)) + .addReg(M68k::SP) + .addReg(M68k::A1); - // RTS - BuildMI(MBB, MBBI, DL, TII->get(M68k::RTS)); - } - } else { - // TODO: RTD can only handle immediates as big as 2**16-1. - // If we need to pop off bytes before the return address, we - // must do it manually. - llvm_unreachable("Stack adjustment size not supported"); + // RTS + BuildMI(MBB, MBBI, DL, TII->get(M68k::RTS)); } // FIXME: Can rest of the operands be ignored, if there is any? diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -3050,9 +3050,8 @@ /// Determines whether the callee is required to pop its own arguments. /// Callee pop is necessary to support tail calls. -bool M68k::isCalleePop(CallingConv::ID CallingConv, bool IsVarArg, - bool GuaranteeTCO) { - return false; +bool M68k::isCalleePop(CallingConv::ID CC, bool IsVarArg, bool GuaranteeTCO) { + return CC == CallingConv::M68k_RTD && !IsVarArg; } // Return true if it is OK for this CMOV pseudo-opcode to be cascaded diff --git a/llvm/test/CodeGen/M68k/CConv/rtd-call.ll b/llvm/test/CodeGen/M68k/CConv/rtd-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/M68k/CConv/rtd-call.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=m68k %s -stop-after=finalize-isel -o - | FileCheck %s + +; We want to make sure caller doesn't pop the stack for callees using +; the M68k_RTD CC. However, we've implemented some frame optimization +; techniques to eliminate as many as frame setup/destroy instructions. +; Therefore, to make test case small and concise, we check the MIR generated +; after ISel instead. + +declare dso_local m68k_rtdcc void @callee(i32 noundef) +declare dso_local m68k_rtdcc void @va_callee(i32 noundef, ...) + +define dso_local i32 @caller(ptr noundef %y) { + ; CHECK-LABEL: name: caller + ; CHECK: bb.0.entry: + ; CHECK-NEXT: [[MOV32rp:%[0-9]+]]:ar32 = MOV32rp 0, %fixed-stack.0, implicit-def dead $ccr :: (load (s32) from %fixed-stack.0, align 8) + ; CHECK-NEXT: [[MOV32rj:%[0-9]+]]:xr32 = MOV32rj killed [[MOV32rp]], implicit-def dead $ccr :: (load (s32) from %ir.y) + ; CHECK-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def dead $sp, implicit-def dead $ccr, implicit $sp + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ar32 = COPY $sp + ; CHECK-NEXT: MOV32jr [[COPY]], [[MOV32rj]], implicit-def dead $ccr :: (store (s32) into stack, align 2) + ; CHECK-NEXT: CALLb @callee, csr_std, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 4, 4, implicit-def dead $sp, implicit-def dead $ccr, implicit $sp + ; CHECK-NEXT: $d0 = COPY [[MOV32rj]] + ; CHECK-NEXT: RET 0, $d0 +entry: + %0 = load i32, ptr %y, align 4 + call m68k_rtdcc void @callee(i32 noundef %0) + ret i32 %0 +} + +define dso_local i32 @va_caller(ptr noundef %y) { + ; CHECK-LABEL: name: va_caller + ; CHECK: bb.0.entry: + ; CHECK-NEXT: [[MOV32rp:%[0-9]+]]:ar32 = MOV32rp 0, %fixed-stack.0, implicit-def dead $ccr :: (load (s32) from %fixed-stack.0, align 8) + ; CHECK-NEXT: [[MOV32rj:%[0-9]+]]:xr32 = MOV32rj killed [[MOV32rp]], implicit-def dead $ccr :: (load (s32) from %ir.y) + ; CHECK-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def dead $sp, implicit-def dead $ccr, implicit $sp + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ar32 = COPY $sp + ; CHECK-NEXT: MOV32jr [[COPY]], [[MOV32rj]], implicit-def dead $ccr :: (store (s32) into stack, align 2) + ; CHECK-NEXT: CALLb @va_callee, csr_std, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 4, 0, implicit-def dead $sp, implicit-def dead $ccr, implicit $sp + ; CHECK-NEXT: $d0 = COPY [[MOV32rj]] + ; CHECK-NEXT: RET 0, $d0 +entry: + %0 = load i32, ptr %y, align 4 + call m68k_rtdcc void (i32, ...) @va_callee(i32 noundef %0) + ret i32 %0 +} + diff --git a/llvm/test/CodeGen/M68k/CConv/rtd-ret.ll b/llvm/test/CodeGen/M68k/CConv/rtd-ret.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/M68k/CConv/rtd-ret.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=m68k < %s | FileCheck %s + +define dso_local m68k_rtdcc i32 @ret(i32 noundef %a, i32 noundef %b, i32 noundef %c) nounwind { +; CHECK-LABEL: ret: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: move.l (8,%sp), %d0 +; CHECK-NEXT: add.l (4,%sp), %d0 +; CHECK-NEXT: add.l (12,%sp), %d0 +; CHECK-NEXT: move.l (%sp), %a1 +; CHECK-NEXT: adda.l #12, %sp +; CHECK-NEXT: move.l %a1, (%sp) +; CHECK-NEXT: rts +entry: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %add, %c + ret i32 %add1 +} + +define dso_local m68k_rtdcc i32 @va_ret(i32 noundef %a, i32 noundef %b, i32 noundef %c, ...) nounwind { +; CHECK-LABEL: va_ret: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: move.l (8,%sp), %d0 +; CHECK-NEXT: add.l (4,%sp), %d0 +; CHECK-NEXT: add.l (12,%sp), %d0 +; CHECK-NEXT: rts +entry: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %add, %c + ret i32 %add1 +}