Index: lib/Target/AArch64/AArch64AsmPrinter.cpp =================================================================== --- lib/Target/AArch64/AArch64AsmPrinter.cpp +++ lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -591,6 +591,7 @@ // attributes (isCall, isReturn, etc.). We lower them to the real // instruction here. case AArch64::TCRETURNri: + case AArch64::TCRETURNriBTI: case AArch64::TCRETURNriALL: { MCInst TmpInst; TmpInst.setOpcode(AArch64::BR); Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -927,7 +927,8 @@ DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || - RetOpcode == AArch64::TCRETURNri; + RetOpcode == AArch64::TCRETURNri || + RetOpcode == AArch64::TCRETURNriBTI; } int NumBytes = MFI.getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo(); Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -360,6 +360,9 @@ def NotForCodeSize : Predicate<"!MF->getFunction().optForSize()">; // Avoid generating STRQro if it is slow, unless we're optimizing for code size. def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">; + + def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>; + def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>; } include "AArch64InstrFormats.td" @@ -6641,10 +6644,18 @@ // some verifier checks for outlined functions. def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>, Sched<[WriteBrReg]>; + // Indirect tail-call limited to only use registers (x16 and x17) which are + // allowed to tail-call a "BTI c" instruction. + def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>, + Sched<[WriteBrReg]>; } def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), - (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>; + (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>, + Requires<[NotUseBTI]>; +def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)), + (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>, + Requires<[UseBTI]>; def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), Index: lib/Target/AArch64/AArch64RegisterInfo.td =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.td +++ lib/Target/AArch64/AArch64RegisterInfo.td @@ -200,6 +200,12 @@ X22, X23, X24, X25, X26, X27, X28, FP, LR)>; +// Restricted set of tail call registers, for use when branch target +// enforcement is enabled. These are the only registers which can be used to +// indirectly branch (not call) to the "BTI c" instruction at the start of a +// BTI-protected function. +def rtcGPR64 : RegisterClass<"AArch64", [i64], 64, (add X16, X17)>; + // GPR register classes for post increment amount of vector load/store that // has alternate printing when Rm=31 and prints a constant immediate value // equal to the total number of bytes transferred. Index: test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/branch-target-enforcement-indirect-calls.ll @@ -0,0 +1,25 @@ +; RUN: llc -mtriple aarch64--none-eabi -mattr=+bti < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-arm-none-eabi" + +; When BTI is enabled, all indirect tail-calls must use x16 or x17 (the intra +; procedure call scratch registers) to hold the address, as these instructions +; are allowed to target the "BTI c" instruction at the start of the target +; function. The alternative to this would be to start functions with "BTI jc", +; which increases the number of potential ways they could be called, and +; weakens the security protections. + +define void @bti_disabled(void ()* %p) { +entry: + tail call void %p() +; CHECK: br x0 + ret void +} + +define void @bti_enabled(void ()* %p) "branch-target-enforcement" { +entry: + tail call void %p() +; CHECK: br {{x16|x17}} + ret void +}