diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -258,6 +258,11 @@ cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)")); +cl::opt CheckAuthenticatedLRByLoad( + "aarch64-check-authenticated-lr-by-load", cl::Hidden, cl::init(false), + cl::desc("When performing a tail call with authenticated LR, " + "use a load instruction to check the LR")); + STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); /// Returns how much of the incoming argument stack area (in bytes) we should @@ -269,14 +274,10 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB) { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - bool IsTailCallReturn = false; - if (MBB.end() != MBBI) { - unsigned RetOpcode = MBBI->getOpcode(); - IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || - RetOpcode == AArch64::TCRETURNri || - RetOpcode == AArch64::TCRETURNriBTI; - } AArch64FunctionInfo *AFI = MF.getInfo(); + bool IsTailCallReturn = (MBB.end() != MBBI) + ? AArch64InstrInfo::isTailCallReturnInst(*MBBI) + : false; int64_t ArgumentPopSize = 0; if (IsTailCallReturn) { @@ -1963,6 +1964,103 @@ } } +// Checks authenticated LR just before the TCRETURN* instruction. +// This function may split MBB - in that case, the returned basic block +// is the new block containing the return instruction. +static MachineBasicBlock &checkAuthenticatedLRIfNeeded(MachineFunction &MF, + MachineBasicBlock &MBB) { + const auto &MFnI = *MF.getInfo(); + const auto &Subtarget = MF.getSubtarget(); + const auto *TII = Subtarget.getInstrInfo(); + const auto *TRI = Subtarget.getRegisterInfo(); + + if (!MFnI.shouldSignReturnAddress(MF)) + return MBB; + + auto TI = MBB.getFirstTerminator(); + if (TI == MBB.end()) + return MBB; + + // Only explicitly check LR if we are performing tail call. + if (!AArch64InstrInfo::isTailCallReturnInst(*TI)) + return MBB; + + Register TmpReg = + TI->readsRegister(AArch64::X16, TRI) ? AArch64::X17 : AArch64::X16; + assert(!TI->readsRegister(TmpReg, TRI) && + "More than a single register is used by TCRETURN"); + + // The following code may create a signing oracle: + // + // + // TCRETURN ; the callee may sign and spill the LR in its prologue + // + // To avoid generating a signing oracle, check the authenticated value + // before possibly re-signing it in the callee, as follows: + // + // + // mov tmp, lr + // xpaclri ; encoded as "hint #7" + // ; Note: at this point, the LR register contains the return address as if + // ; the authentication succeeded and the temporary register contains the + // ; *real* result of authentication. + // cmp tmp, lr + // b.ne break_block + // ret_block: + // TCRETURN + // break_block: + // brk 0xc471 + // + // or just + // + // + // ldr tmp, [lr] + // TCRETURN + + const BasicBlock *BB = MBB.getBasicBlock(); + DebugLoc DL = TI->getDebugLoc(); + + if (CheckAuthenticatedLRByLoad) { + BuildMI(MBB, TI, DL, TII->get(AArch64::LDRXui), TmpReg) + .addReg(AArch64::LR) + .addImm(0) + .setMIFlags(MachineInstr::FrameDestroy); + return MBB; + } + + // Auth instruction was previously added to this basic block. + assert(TI != MBB.begin() && "Non-terminator instructions expected"); + auto &LastNonTerminator = *std::prev(TI); + MachineBasicBlock *RetBlock = MBB.splitAt(LastNonTerminator); + + MachineBasicBlock *BreakBlock = MF.CreateMachineBasicBlock(BB); + MF.push_back(BreakBlock); + MBB.splitSuccessor(RetBlock, BreakBlock); + + MachineBasicBlock *AuthBlock = &MBB; + BuildMI(AuthBlock, DL, TII->get(TargetOpcode::COPY), TmpReg) + .addReg(AArch64::LR) + .setMIFlags(MachineInstr::FrameDestroy); + BuildMI(AuthBlock, DL, TII->get(AArch64::XPACLRI)) + .setMIFlags(MachineInstr::FrameDestroy); + BuildMI(AuthBlock, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) + .addReg(TmpReg) + .addReg(AArch64::LR) + .addImm(0) + .setMIFlags(MachineInstr::FrameDestroy); + BuildMI(AuthBlock, DL, TII->get(AArch64::Bcc)) + .addImm(AArch64CC::NE) + .addMBB(BreakBlock) + .setMIFlags(MachineInstr::FrameDestroy); + assert(AuthBlock->getFallThrough() == RetBlock); + + BuildMI(BreakBlock, DL, TII->get(AArch64::BRK)) + .addImm(0xc471) + .setMIFlags(MachineInstr::FrameDestroy); + + return *RetBlock; +} + static bool isFuncletReturnInstr(const MachineInstr &MI) { switch (MI.getOpcode()) { default: @@ -1993,14 +2091,18 @@ } auto FinishingTouches = make_scope_exit([&]() { + MachineBasicBlock *RetMBB = &MBB; if (AFI->shouldSignReturnAddress(MF)) authenticateLR(MF, MBB, NeedsWinCFI, &HasWinCFI); if (needsShadowCallStackPrologueEpilogue(MF)) emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL); + else + RetMBB = &checkAuthenticatedLRIfNeeded(MF, MBB); + // checkAuthenticatedLR() may have split MBB at this point. if (EmitCFI) - emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator()); + emitCalleeSavedGPRRestores(*RetMBB, RetMBB->getFirstTerminator()); if (HasWinCFI) - BuildMI(MBB, MBB.getFirstTerminator(), DL, + BuildMI(*RetMBB, RetMBB->getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) .setMIFlag(MachineInstr::FrameDestroy); }); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -126,6 +126,9 @@ /// Return true if pairing the given load or store may be paired with another. static bool isPairableLdStInst(const MachineInstr &MI); + /// Returns true if MI is one of the TCRETURN* instructions. + static bool isTailCallReturnInst(const MachineInstr &MI); + /// Return the opcode that set flags when possible. The caller is /// responsible for ensuring the opc has a flag setting equivalent. static unsigned convertToFlagSettingOpc(unsigned Opc); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2454,6 +2454,20 @@ } } +bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + assert((!MI.isCall() || !MI.isReturn()) && + "Unexpected instruction - was a new tail call opcode introduced?"); + return false; + case AArch64::TCRETURNdi: + case AArch64::TCRETURNri: + case AArch64::TCRETURNriBTI: + case AArch64::TCRETURNriALL: + return true; + } +} + unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) { switch (Opc) { default: diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll --- a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll +++ b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll @@ -67,7 +67,7 @@ ; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V8A-NEXT: .cfi_offset w30, -16 ; CHECK-V8A-NEXT: .cfi_remember_state -; CHECK-V8A-NEXT: cbz w0, .LBB1_2 +; CHECK-V8A-NEXT: cbz w0, .LBB1_3 ; CHECK-V8A-NEXT: // %bb.1: // %if.then ; CHECK-V8A-NEXT: mov w0, wzr ; CHECK-V8A-NEXT: bl _Z3bari @@ -75,9 +75,14 @@ ; CHECK-V8A-NEXT: .cfi_def_cfa_offset 0 ; CHECK-V8A-NEXT: hint #29 ; CHECK-V8A-NEXT: .cfi_negate_ra_state +; CHECK-V8A-NEXT: mov x16, x30 +; CHECK-V8A-NEXT: hint #7 +; CHECK-V8A-NEXT: cmp x16, x30 +; CHECK-V8A-NEXT: b.ne .LBB1_4 +; CHECK-V8A-NEXT: // %bb.2: // %if.then ; CHECK-V8A-NEXT: .cfi_restore w30 ; CHECK-V8A-NEXT: b _Z3bari -; CHECK-V8A-NEXT: .LBB1_2: // %if.else +; CHECK-V8A-NEXT: .LBB1_3: // %if.else ; CHECK-V8A-NEXT: .cfi_restore_state ; CHECK-V8A-NEXT: bl _Z4quuxi ; CHECK-V8A-NEXT: add w0, w0, #1 @@ -87,6 +92,8 @@ ; CHECK-V8A-NEXT: .cfi_negate_ra_state ; CHECK-V8A-NEXT: .cfi_restore w30 ; CHECK-V8A-NEXT: ret +; CHECK-V8A-NEXT: .LBB1_4: // %if.then +; CHECK-V8A-NEXT: brk #0xc471 ; ; CHECK-V83A-LABEL: baz_async: ; CHECK-V83A: // %bb.0: // %entry @@ -96,7 +103,7 @@ ; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V83A-NEXT: .cfi_offset w30, -16 ; CHECK-V83A-NEXT: .cfi_remember_state -; CHECK-V83A-NEXT: cbz w0, .LBB1_2 +; CHECK-V83A-NEXT: cbz w0, .LBB1_3 ; CHECK-V83A-NEXT: // %bb.1: // %if.then ; CHECK-V83A-NEXT: mov w0, wzr ; CHECK-V83A-NEXT: bl _Z3bari @@ -104,9 +111,14 @@ ; CHECK-V83A-NEXT: .cfi_def_cfa_offset 0 ; CHECK-V83A-NEXT: autiasp ; CHECK-V83A-NEXT: .cfi_negate_ra_state +; CHECK-V83A-NEXT: mov x16, x30 +; CHECK-V83A-NEXT: xpaclri +; CHECK-V83A-NEXT: cmp x16, x30 +; CHECK-V83A-NEXT: b.ne .LBB1_4 +; CHECK-V83A-NEXT: // %bb.2: // %if.then ; CHECK-V83A-NEXT: .cfi_restore w30 ; CHECK-V83A-NEXT: b _Z3bari -; CHECK-V83A-NEXT: .LBB1_2: // %if.else +; CHECK-V83A-NEXT: .LBB1_3: // %if.else ; CHECK-V83A-NEXT: .cfi_restore_state ; CHECK-V83A-NEXT: bl _Z4quuxi ; CHECK-V83A-NEXT: add w0, w0, #1 @@ -114,6 +126,8 @@ ; CHECK-V83A-NEXT: .cfi_def_cfa_offset 0 ; CHECK-V83A-NEXT: .cfi_restore w30 ; CHECK-V83A-NEXT: retaa +; CHECK-V83A-NEXT: .LBB1_4: // %if.then +; CHECK-V83A-NEXT: brk #0xc471 entry: %tobool.not = icmp eq i32 %a, 0 br i1 %tobool.not, label %if.else, label %if.then @@ -145,19 +159,26 @@ ; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V8A-NEXT: .cfi_offset w30, -16 -; CHECK-V8A-NEXT: cbz w0, .LBB2_2 +; CHECK-V8A-NEXT: cbz w0, .LBB2_3 ; CHECK-V8A-NEXT: // %bb.1: // %if.then ; CHECK-V8A-NEXT: mov w0, wzr ; CHECK-V8A-NEXT: bl _Z3bari ; CHECK-V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-V8A-NEXT: hint #29 +; CHECK-V8A-NEXT: mov x16, x30 +; CHECK-V8A-NEXT: hint #7 +; CHECK-V8A-NEXT: cmp x16, x30 +; CHECK-V8A-NEXT: b.ne .LBB2_4 +; CHECK-V8A-NEXT: // %bb.2: // %if.then ; CHECK-V8A-NEXT: b _Z3bari -; CHECK-V8A-NEXT: .LBB2_2: // %if.else +; CHECK-V8A-NEXT: .LBB2_3: // %if.else ; CHECK-V8A-NEXT: bl _Z4quuxi ; CHECK-V8A-NEXT: add w0, w0, #1 ; CHECK-V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-V8A-NEXT: hint #29 ; CHECK-V8A-NEXT: ret +; CHECK-V8A-NEXT: .LBB2_4: // %if.then +; CHECK-V8A-NEXT: brk #0xc471 ; ; CHECK-V83A-LABEL: baz_sync: ; CHECK-V83A: // %bb.0: // %entry @@ -166,18 +187,25 @@ ; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V83A-NEXT: .cfi_offset w30, -16 -; CHECK-V83A-NEXT: cbz w0, .LBB2_2 +; CHECK-V83A-NEXT: cbz w0, .LBB2_3 ; CHECK-V83A-NEXT: // %bb.1: // %if.then ; CHECK-V83A-NEXT: mov w0, wzr ; CHECK-V83A-NEXT: bl _Z3bari ; CHECK-V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-V83A-NEXT: autiasp +; CHECK-V83A-NEXT: mov x16, x30 +; CHECK-V83A-NEXT: xpaclri +; CHECK-V83A-NEXT: cmp x16, x30 +; CHECK-V83A-NEXT: b.ne .LBB2_4 +; CHECK-V83A-NEXT: // %bb.2: // %if.then ; CHECK-V83A-NEXT: b _Z3bari -; CHECK-V83A-NEXT: .LBB2_2: // %if.else +; CHECK-V83A-NEXT: .LBB2_3: // %if.else ; CHECK-V83A-NEXT: bl _Z4quuxi ; CHECK-V83A-NEXT: add w0, w0, #1 ; CHECK-V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-V83A-NEXT: retaa +; CHECK-V83A-NEXT: .LBB2_4: // %if.then +; CHECK-V83A-NEXT: brk #0xc471 entry: %tobool.not = icmp eq i32 %a, 0 br i1 %tobool.not, label %if.else, label %if.then diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-tailcall.ll b/llvm/test/CodeGen/AArch64/sign-return-address-tailcall.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sign-return-address-tailcall.ll @@ -0,0 +1,126 @@ +; RUN: llc -mtriple=aarch64 -asm-verbose=0 -aarch64-check-authenticated-lr-by-load < %s | FileCheck -DAUTIASP="hint #29" --check-prefixes=COMMON,LDR %s +; RUN: llc -mtriple=aarch64 -asm-verbose=0 -aarch64-check-authenticated-lr-by-load -mattr=v8.3a < %s | FileCheck -DAUTIASP="autiasp" --check-prefixes=COMMON,LDR %s +; RUN: llc -mtriple=aarch64 -asm-verbose=0 < %s | FileCheck -DAUTIASP="hint #29" -DXPACLRI="hint #7" --check-prefixes=COMMON,XPAC %s +; RUN: llc -mtriple=aarch64 -asm-verbose=0 -mattr=v8.3a < %s | FileCheck -DAUTIASP="autiasp" -DXPACLRI="xpaclri" --check-prefixes=COMMON,XPAC %s + +define i32 @tailcall_direct() "sign-return-address"="non-leaf" { +; COMMON-LABEL: tailcall_direct: +; COMMON: str x30, [sp, #-16]! +; COMMON: ldr x30, [sp], #16 +; +; LDR-NEXT: [[AUTIASP]] +; LDR-NEXT: ldr x16, [x30] +; LDR-NEXT: b callee +; +; XPAC-NEXT: [[AUTIASP]] +; XPAC-NEXT: mov x16, x30 +; XPAC-NEXT: [[XPACLRI]] +; XPAC-NEXT: cmp x16, x30 +; XPAC-NEXT: b.ne .[[FAIL:LBB[_0-9]+]] +; XPAC-NEXT: b callee +; XPAC-NEXT: .[[FAIL]]: +; XPAC-NEXT: brk #0xc471 + tail call void asm sideeffect "", "~{lr}"() + %call = tail call i32 @callee() + ret i32 %call +} + +define i32 @tailcall_indirect(ptr %fptr) "sign-return-address"="non-leaf" { +; COMMON-LABEL: tailcall_indirect: +; COMMON: str x30, [sp, #-16]! +; COMMON: ldr x30, [sp], #16 +; +; LDR-NEXT: [[AUTIASP]] +; LDR-NEXT: ldr x16, [x30] +; LDR-NEXT: br x0 +; +; XPAC-NEXT: [[AUTIASP]] +; XPAC-NEXT: mov x16, x30 +; XPAC-NEXT: [[XPACLRI]] +; XPAC-NEXT: cmp x16, x30 +; XPAC-NEXT: b.ne .[[FAIL:LBB[_0-9]+]] +; XPAC-NEXT: br x0 +; XPAC-NEXT: .[[FAIL]]: +; XPAC-NEXT: brk #0xc471 + tail call void asm sideeffect "", "~{lr}"() + %call = tail call i32 %fptr() + ret i32 %call +} + +define i32 @tailcall_direct_noframe() "sign-return-address"="non-leaf" { +; COMMON-LABEL: tailcall_direct_noframe: +; COMMON-NEXT: .cfi_startproc +; COMMON-NEXT: b callee + %call = tail call i32 @callee() + ret i32 %call +} + +define i32 @tailcall_indirect_noframe(ptr %fptr) "sign-return-address"="non-leaf" { +; COMMON-LABEL: tailcall_indirect_noframe: +; COMMON-NEXT: .cfi_startproc +; COMMON-NEXT: br x0 + %call = tail call i32 %fptr() + ret i32 %call +} + +define i32 @tailcall_direct_noframe_sign_all() "sign-return-address"="all" { +; COMMON-LABEL: tailcall_direct_noframe_sign_all: +; COMMON-NOT: str{{.*}}x30 +; COMMON-NOT: ldr{{.*}}x30 +; +; LDR: [[AUTIASP]] +; LDR-NEXT: ldr x16, [x30] +; LDR-NEXT: b callee +; +; XPAC: [[AUTIASP]] +; XPAC-NEXT: mov x16, x30 +; XPAC-NEXT: [[XPACLRI]] +; XPAC-NEXT: cmp x16, x30 +; XPAC-NEXT: b.ne .[[FAIL:LBB[_0-9]+]] +; XPAC-NEXT: b callee +; XPAC-NEXT: .[[FAIL]]: +; XPAC-NEXT: brk #0xc471 + %call = tail call i32 @callee() + ret i32 %call +} + +define i32 @tailcall_indirect_noframe_sign_all(ptr %fptr) "sign-return-address"="all" { +; COMMON-LABEL: tailcall_indirect_noframe_sign_all: +; COMMON-NOT: str{{.*}}x30 +; COMMON-NOT: ldr{{.*}}x30 +; +; LDR: [[AUTIASP]] +; LDR-NEXT: ldr x16, [x30] +; LDR-NEXT: br x0 +; +; XPAC: [[AUTIASP]] +; XPAC-NEXT: mov x16, x30 +; XPAC-NEXT: [[XPACLRI]] +; XPAC-NEXT: cmp x16, x30 +; XPAC-NEXT: b.ne .[[FAIL:LBB[_0-9]+]] +; XPAC-NEXT: br x0 +; XPAC-NEXT: .[[FAIL]]: +; XPAC-NEXT: brk #0xc471 + %call = tail call i32 %fptr() + ret i32 %call +} + +; Do not emit any LR checks when Shadow Call Stack is enabled +define i32 @tailcall_scs(ptr %fptr) "sign-return-address"="all" shadowcallstack "target-features"="+reserve-x18" { +; COMMON-LABEL: tailcall_scs: +; COMMON: str x30, [sp, #-16]! +; COMMON: ldr x30, [sp], #16 +; +; COMMON-NOT: ldr {{.*}}, [x30] +; COMMON-NOT: xpac +; COMMON-NOT: hint #7 +; COMMON-NOT: brk +; +; Match the end of function: +; COMMON: .size tailcall_scs, + tail call void asm sideeffect "", "~{lr}"() + %call = tail call i32 %fptr() + ret i32 %call +} + +declare i32 @callee() diff --git a/llvm/test/CodeGen/AArch64/sign-return-address.ll b/llvm/test/CodeGen/AArch64/sign-return-address.ll --- a/llvm/test/CodeGen/AArch64/sign-return-address.ll +++ b/llvm/test/CodeGen/AArch64/sign-return-address.ll @@ -170,7 +170,14 @@ ; COMPAT-NEXT: //NO_APP ; COMPAT-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; COMPAT-NEXT: hint #29 +; COMPAT-NEXT: mov x16, x30 +; COMPAT-NEXT: hint #7 +; COMPAT-NEXT: cmp x16, x30 +; COMPAT-NEXT: b.ne .LBB9_2 +; COMPAT-NEXT: // %bb.1: ; COMPAT-NEXT: b bar +; COMPAT-NEXT: .LBB9_2: +; COMPAT-NEXT: brk #0xc471 ; ; V83A-LABEL: spill_lr_and_tail_call: ; V83A: // %bb.0: @@ -184,7 +191,14 @@ ; V83A-NEXT: //NO_APP ; V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; V83A-NEXT: autiasp +; V83A-NEXT: mov x16, x30 +; V83A-NEXT: xpaclri +; V83A-NEXT: cmp x16, x30 +; V83A-NEXT: b.ne .LBB9_2 +; V83A-NEXT: // %bb.1: ; V83A-NEXT: b bar +; V83A-NEXT: .LBB9_2: +; V83A-NEXT: brk #0xc471 call void asm sideeffect "mov x30, $0", "r,~{lr}"(i64 %x) #1 tail call fastcc i64 @bar(i64 %x) ret void