diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -3329,7 +3329,7 @@ .. option:: -mno-bti-at-return-twice -Do not add a BTI instruction after a setjmp or other return-twice construct (Arm only) +Do not add a BTI instruction after a setjmp or other return-twice construct (AArch32/AArch64 only) .. option:: -mno-movt diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -193,6 +193,11 @@ Arm and AArch64 Support in Clang -------------------------------- +- When using ``-mbranch-protection=bti`` with AArch64, calls to setjmp will + now be followed by a BTI instruction. This is done to be compatible with + setjmp implementations that return with a br instead of a ret. You can + disable this behaviour using the ``-mno-bti-at-return-twice`` option. + Floating Point Support in Clang ------------------------------- diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3414,7 +3414,7 @@ def mno_bti_at_return_twice : Flag<["-"], "mno-bti-at-return-twice">, Group, HelpText<"Do not add a BTI instruction after a setjmp or other" - " return-twice construct (Arm only)">; + " return-twice construct (Arm/AArch64 only)">; foreach i = {1-31} in def ffixed_x#i : Flag<["-"], "ffixed-x"#i>, Group, diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -588,4 +588,7 @@ // Enabled A53 errata (835769) workaround by default on android Features.push_back("+fix-cortex-a53-835769"); } + + if (Args.getLastArg(options::OPT_mno_bti_at_return_twice)) + Features.push_back("+no-bti-at-return-twice"); } diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -466,6 +466,11 @@ def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769", "FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">; +def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", + "NoBTIAtReturnTwice", "true", + "Don't place a BTI instruction " + "after a return-twice">; + //===----------------------------------------------------------------------===// // Architectures. // diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -86,6 +86,7 @@ unsigned N); bool expandCALL_RVMARKER(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); }; @@ -759,6 +760,37 @@ return true; } +bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + // Expand CALL_BTI pseudo to: + // - a branch to the call target + // - a BTI instruction + // Mark the sequence as a bundle, to avoid passes moving other code in + // between. + + MachineInstr &MI = *MBBI; + MachineOperand &CallTarget = MI.getOperand(0); + assert((CallTarget.isGlobal() || CallTarget.isReg()) && + "invalid operand for regular call"); + unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; + MachineInstr *Call = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); + Call->addOperand(CallTarget); + + MachineInstr *BTI = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT)) + // BTI J so that setjmp can to BR to this. + .addImm(36) + .getInstr(); + + if (MI.shouldUpdateCallSiteInfo()) + MBB.getParent()->moveCallSiteInfo(&MI, Call); + + MI.eraseFromParent(); + finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator())); + return true; +} + bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { Register CtxReg = MBBI->getOperand(0).getReg(); @@ -1238,6 +1270,8 @@ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); case AArch64::BLR_RVMARKER: return expandCALL_RVMARKER(MBB, MBBI); + case AArch64::BLR_BTI: + return expandCALL_BTI(MBB, MBBI); case AArch64::StoreSwiftAsyncContext: return expandStoreSwiftAsyncContext(MBB, MBBI); } diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -14,6 +14,7 @@ #include "AArch64.h" #include "AArch64CallingConvention.h" +#include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -3127,6 +3128,13 @@ if (!Callee && !Symbol) return false; + // Allow SelectionDAG isel to handle calls to functions like setjmp that need + // a bti instruction following the call. + if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && + !Subtarget->noBTIAtReturnTwice() && + MF->getInfo()->branchTargetEnforcement()) + return false; + // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) return false; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -55,6 +55,8 @@ // x29, x29` marker instruction. CALL_RVMARKER, + CALL_BTI, // Function call followed by a BTI instruction. + // Produces the full sequence of instructions for getting the thread pointer // offset of a variable into X0, using the TLSDesc model. TLSDESC_CALLSEQ, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2274,6 +2274,7 @@ MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING) MAKE_CASE(AArch64ISD::MOPS_MEMCOPY) MAKE_CASE(AArch64ISD::MOPS_MEMMOVE) + MAKE_CASE(AArch64ISD::CALL_BTI) } #undef MAKE_CASE return nullptr; @@ -6178,6 +6179,12 @@ AArch64FunctionInfo *FuncInfo = MF.getInfo(); bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; bool IsSibCall = false; + bool GuardWithBTI = false; + + if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && + !Subtarget->noBTIAtReturnTwice()) { + GuardWithBTI = FuncInfo->branchTargetEnforcement(); + } // Check callee args/returns for SVE registers and set calling convention // accordingly. @@ -6612,7 +6619,8 @@ Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB); auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT); Ops.insert(Ops.begin() + 1, GA); - } + } else if (GuardWithBTI) + CallOpc = AArch64ISD::CALL_BTI; // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -473,6 +473,11 @@ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", + SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, @@ -2328,6 +2333,8 @@ PseudoInstExpansion<(BLR GPR64:$Rn)>; def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, Sched<[WriteBrReg]>; + def BLR_BTI : Pseudo<(outs), (ins GPR64:$Rn), []>, + Sched<[WriteBrReg]>; } // isCall def : Pat<(AArch64call GPR64:$Rn), @@ -2341,6 +2348,9 @@ (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, Requires<[NoSLSBLRMitigation]>; +def : Pat<(AArch64call_bti GPR64:$Rn), + (BLR_BTI GPR64:$Rn)>; + let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; } // isBranch, isTerminator, isBarrier, isIndirectBranch diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -1129,12 +1129,20 @@ // Create a temporarily-floating call instruction so we can add the implicit // uses of arg registers. + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + unsigned Opc = 0; // Calls with operand bundle "clang.arc.attachedcall" are special. They should // be expanded to the call, directly followed by a special marker sequence and // a call to an ObjC library function. - unsigned Opc = 0; if (Info.CB && objcarc::hasAttachedCallOpBundle(Info.CB)) Opc = AArch64::BLR_RVMARKER; + // A call to a returns twice function like setjmp must be followed by a bti + // instruction. + else if (Info.CB && + Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && + !Subtarget.noBTIAtReturnTwice() && + MF.getInfo()->branchTargetEnforcement()) + Opc = AArch64::BLR_BTI; else Opc = getCallOpcode(MF, Info.Callee.isReg(), false); @@ -1153,7 +1161,6 @@ // Tell the call which registers are clobbered. const uint32_t *Mask; - const AArch64Subtarget &Subtarget = MF.getSubtarget(); const auto *TRI = Subtarget.getRegisterInfo(); AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, diff --git a/llvm/test/CodeGen/AArch64/setjmp-bti-no-enforcement.ll b/llvm/test/CodeGen/AArch64/setjmp-bti-no-enforcement.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/setjmp-bti-no-enforcement.ll @@ -0,0 +1,51 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=NOBTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel < %s | FileCheck %s --check-prefix=NOBTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel < %s | FileCheck %s --check-prefix=NOBTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+no-bti-at-return-twice < %s | \ +; RUN: FileCheck %s --check-prefix=NOBTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -mattr=+no-bti-at-return-twice < %s | \ +; RUN: FileCheck %s --check-prefix=NOBTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel -mattr=+no-bti-at-return-twice < %s | \ +; RUN: FileCheck %s --check-prefix=NOBTI + +; Same as setjmp-bti.ll except that we do not enable branch target enforcement for this +; module. There should be no combination of options that leads to a bti being emitted. + +; C source +; -------- +; extern int setjmp(void*); +; extern void notsetjmp(void); +; +; void bbb(void) { +; setjmp(0); +; int (*fnptr)(void*) = setjmp; +; fnptr(0); +; notsetjmp(); +; } + +define void @bbb() { +; NOBTI-LABEL: bbb: +; NOBTI: bl setjmp +; NOBTI-NOT: hint #36 +; NOBTI: blr x{{[0-9]+}} +; NOBTI-NOT: hint #36 +; NOBTI: bl notsetjmp +; NOBTI-NOT: hint #36 + +entry: + %fnptr = alloca i32 (i8*)*, align 8 + %call = call i32 @setjmp(i8* noundef null) #0 + store i32 (i8*)* @setjmp, i32 (i8*)** %fnptr, align 8 + %0 = load i32 (i8*)*, i32 (i8*)** %fnptr, align 8 + %call1 = call i32 %0(i8* noundef null) #0 + call void @notsetjmp() + ret void +} + +declare i32 @setjmp(i8* noundef) #0 +declare void @notsetjmp() + +attributes #0 = { returns_twice } + +; !llvm.module.flags = !{!0} +; !0 = !{i32 1, !"branch-target-enforcement", i32 1} diff --git a/llvm/test/CodeGen/AArch64/setjmp-bti-outliner.ll b/llvm/test/CodeGen/AArch64/setjmp-bti-outliner.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/setjmp-bti-outliner.ll @@ -0,0 +1,83 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -enable-machine-outliner < %s | FileCheck %s --check-prefix=BTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -enable-machine-outliner < %s | \ +; RUN: FileCheck %s --check-prefix=BTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel -enable-machine-outliner < %s | \ +; RUN: FileCheck %s --check-prefix=BTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -enable-machine-outliner -mattr=+no-bti-at-return-twice < %s | \ +; RUN: FileCheck %s --check-prefix=NOBTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -enable-machine-outliner -mattr=+no-bti-at-return-twice < %s | \ +; RUN: FileCheck %s --check-prefix=NOBTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel -enable-machine-outliner -mattr=+no-bti-at-return-twice < %s | \ +; RUN: FileCheck %s --check-prefix=NOBTI + +; Check that the outliner does not split up the call to setjmp and the bti after it. +; When we do not insert a bti, it is allowed to move the setjmp call into an outlined function. + +; C source +; -------- +; extern int setjmp(void*); +; +; int f(int a, int b, int c, int d) { +; setjmp(0); +; return 1 + a * (a + b) / (c + d); +; } +; +; int g(int a, int b, int c, int d) { +; setjmp(0); +; return 2 + a * (a + b) / (c + d); +; } + +define i32 @f(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) { +; BTI-LABEL: f: +; BTI: bl OUTLINED_FUNCTION_1 +; BTI-NEXT: bl setjmp +; BTI-NEXT: hint #36 +; BTI-NEXT: bl OUTLINED_FUNCTION_0 + +; NOBTI: f: +; NOBTI: bl OUTLINED_FUNCTION_0 +; NOBTI-NEXT: bl OUTLINED_FUNCTION_1 + +entry: + %call = call i32 @setjmp(i8* noundef null) #0 + %add = add nsw i32 %b, %a + %mul = mul nsw i32 %add, %a + %add1 = add nsw i32 %d, %c + %div = sdiv i32 %mul, %add1 + %add2 = add nsw i32 %div, 1 + ret i32 %add2 +} + +declare i32 @setjmp(i8* noundef) #0 + +define i32 @g(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) { +; BTI-LABEL: g: +; BTI: bl OUTLINED_FUNCTION_1 +; BTI-NEXT: bl setjmp +; BTI-NEXT: hint #36 +; BTI-NEXT: bl OUTLINED_FUNCTION_0 + +; NOBTI: g: +; NOBTI: bl OUTLINED_FUNCTION_0 +; NOBTI-NEXT: bl OUTLINED_FUNCTION_1 + +entry: + %call = call i32 @setjmp(i8* noundef null) #0 + %add = add nsw i32 %b, %a + %mul = mul nsw i32 %add, %a + %add1 = add nsw i32 %d, %c + %div = sdiv i32 %mul, %add1 + %add2 = add nsw i32 %div, 2 + ret i32 %add2 +} + +; NOBTI-LABEL: OUTLINED_FUNCTION_0: +; NOBTI: b setjmp +; NOBTI: OUTLINED_FUNCTION_1: +; NOBTI-LABEL: ret + +attributes #0 = { returns_twice } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"branch-target-enforcement", i32 1} diff --git a/llvm/test/CodeGen/AArch64/setjmp-bti.ll b/llvm/test/CodeGen/AArch64/setjmp-bti.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/setjmp-bti.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=BTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel < %s | FileCheck %s --check-prefix=BTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel < %s | FileCheck %s --check-prefix=BTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+no-bti-at-return-twice < %s | \ +; RUN: FileCheck %s --check-prefix=NOBTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -mattr=+no-bti-at-return-twice < %s | \ +; RUN: FileCheck %s --check-prefix=NOBTI +; RUN: llc -mtriple=aarch64-none-linux-gnu -fast-isel -mattr=+no-bti-at-return-twice < %s | \ +; RUN: FileCheck %s --check-prefix=NOBTI + +; C source +; -------- +; extern int setjmp(void*); +; extern void notsetjmp(void); +; +; void bbb(void) { +; setjmp(0); +; int (*fnptr)(void*) = setjmp; +; fnptr(0); +; notsetjmp(); +; } + +define void @bbb() { +; BTI-LABEL: bbb: +; BTI: bl setjmp +; BTI-NEXT: hint #36 +; BTI: blr x{{[0-9]+}} +; BTI-NEXT: hint #36 +; BTI: bl notsetjmp +; BTI-NOT: hint #36 + +; NOBTI-LABEL: bbb: +; NOBTI: bl setjmp +; NOBTI-NOT: hint #36 +; NOBTI: blr x{{[0-9]+}} +; NOBTI-NOT: hint #36 +; NOBTI: bl notsetjmp +; NOBTI-NOT: hint #36 +entry: + %fnptr = alloca i32 (i8*)*, align 8 + %call = call i32 @setjmp(i8* noundef null) #0 + store i32 (i8*)* @setjmp, i32 (i8*)** %fnptr, align 8 + %0 = load i32 (i8*)*, i32 (i8*)** %fnptr, align 8 + %call1 = call i32 %0(i8* noundef null) #0 + call void @notsetjmp() + ret void +} + +declare i32 @setjmp(i8* noundef) #0 +declare void @notsetjmp() + +attributes #0 = { returns_twice } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"branch-target-enforcement", i32 1}