Index: llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -590,7 +590,8 @@ // Tail calls use pseudo instructions so they have the proper code-gen // attributes (isCall, isReturn, etc.). We lower them to the real // instruction here. - case AArch64::TCRETURNri: { + case AArch64::TCRETURNri: + case AArch64::TCRETURNriALL: { MCInst TmpInst; TmpInst.setOpcode(AArch64::BR); TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); Index: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5461,7 +5461,7 @@ TailOpcode = AArch64::TCRETURNdi; } else { assert(Call->getOpcode() == AArch64::BLR); - TailOpcode = AArch64::TCRETURNri; + TailOpcode = AArch64::TCRETURNriALL; } MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode)) .add(Call->getOperand(0)) Index: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td @@ -6635,6 +6635,12 @@ Sched<[WriteBrReg]>; def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, Sched<[WriteBrReg]>; + // Indirect tail-call with any register allowed, used by MachineOutliner when + // this is proven safe. + // FIXME: If we have to add any more hacks like this, we should instead relax + // some verifier checks for outlined functions. + def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>, + Sched<[WriteBrReg]>; } def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), Index: llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll +++ llvm/trunk/test/CodeGen/AArch64/machine-outliner-thunk.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: bl [[OUTLINED_DIRECT:OUTLINED_FUNCTION_[0-9]+]] ; CHECK-NEXT: add w0, w0, #8 // =8 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -28,7 +28,7 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: bl [[OUTLINED_DIRECT]] ; CHECK-NEXT: add w0, w0, #88 // =88 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -38,7 +38,48 @@ ret i32 %cx } -; CHECK-LABEL: OUTLINED_FUNCTION_0: +define hidden i32 @c(i32 (i32, i32, i32, i32)* %fptr) { +; CHECK-LABEL: c: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl [[OUTLINED_INDIRECT:OUTLINED_FUNCTION_[0-9]+]] +; CHECK-NEXT: add w0, w0, #8 // =8 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) + %add = add nsw i32 %call, 8 + ret i32 %add +} + +define hidden i32 @d(i32 (i32, i32, i32, i32)* %fptr) { +; CHECK-LABEL: d: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl [[OUTLINED_INDIRECT]] +; CHECK-NEXT: add w0, w0, #88 // =88 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) + %add = add nsw i32 %call, 88 + ret i32 %add +} + +; CHECK: [[OUTLINED_INDIRECT]]: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: orr w0, wzr, #0x1 +; CHECK-NEXT: orr w1, wzr, #0x2 +; CHECK-NEXT: orr w2, wzr, #0x3 +; CHECK-NEXT: orr w3, wzr, #0x4 +; CHECK-NEXT: br x8 + +; CHECK: [[OUTLINED_DIRECT]]: ; CHECK: // %bb.0: ; CHECK-NEXT: orr w0, wzr, #0x1 ; CHECK-NEXT: orr w1, wzr, #0x2