diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1913,6 +1913,12 @@ "Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!"); } + /// Optional target hook to create the LLVM IR attributes for the outlined + /// function. If overridden, the overriding function must call the default + /// implementation. + virtual void mergeOutliningCandidateAttributes( + Function &F, std::vector &Candidates) const; + /// Returns how or if \p MI should be outlined. virtual outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const { diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -617,20 +617,11 @@ F->addFnAttr(Attribute::OptimizeForSize); F->addFnAttr(Attribute::MinSize); - // Include target features from an arbitrary candidate for the outlined - // function. This makes sure the outlined function knows what kinds of - // instructions are going into it. This is fine, since all parent functions - // must necessarily support the instructions that are in the outlined region. Candidate &FirstCand = OF.Candidates.front(); - const Function &ParentFn = FirstCand.getMF()->getFunction(); - if (ParentFn.hasFnAttribute("target-features")) - F->addFnAttr(ParentFn.getFnAttribute("target-features")); + const TargetInstrInfo &TII = + *FirstCand.getMF()->getSubtarget().getInstrInfo(); - // Set nounwind, so we don't generate eh_frame. - if (llvm::all_of(OF.Candidates, [](const outliner::Candidate &C) { - return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind); - })) - F->addFnAttr(Attribute::NoUnwind); + TII.mergeOutliningCandidateAttributes(*F, OF.Candidates); BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); IRBuilder<> Builder(EntryBB); @@ -639,8 +630,6 @@ MachineModuleInfo &MMI = getAnalysis().getMMI(); MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock(); - const TargetSubtargetInfo &STI = MF.getSubtarget(); - const TargetInstrInfo &TII = *STI.getInstrInfo(); // Insert the new function into the module. MF.insert(MF.begin(), &MBB); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1400,3 +1400,21 @@ } TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} + +void TargetInstrInfo::mergeOutliningCandidateAttributes( + Function &F, std::vector &Candidates) const { + // Include target features from an arbitrary candidate for the outlined + // function. This makes sure the outlined function knows what kinds of + // instructions are going into it. This is fine, since all parent functions + // must necessarily support the instructions that are in the outlined region. + outliner::Candidate &FirstCand = Candidates.front(); + const Function &ParentFn = FirstCand.getMF()->getFunction(); + if (ParentFn.hasFnAttribute("target-features")) + F.addFnAttr(ParentFn.getFnAttribute("target-features")); + + // Set nounwind, so we don't generate eh_frame. + if (llvm::all_of(Candidates, [](const outliner::Candidate &C) { + return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind); + })) + F.addFnAttr(Attribute::NoUnwind); +} diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -44,6 +44,7 @@ FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); +FunctionPass *createARMBranchTargetsPass(); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); @@ -66,6 +67,7 @@ void initializeARMParallelDSPPass(PassRegistry &); void initializeARMLoadStoreOptPass(PassRegistry &); void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); +void initializeARMBranchTargetsPass(PassRegistry &); void initializeARMConstantIslandsPass(PassRegistry &); void initializeARMExpandPseudoPass(PassRegistry &); void initializeThumb2SizeReducePass(PassRegistry &); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -349,6 +349,8 @@ bool OutlineFromLinkOnceODRs) const override; outliner::OutlinedFunction getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const override; + void mergeOutliningCandidateAttributes( + Function &F, std::vector &Candidates) const override; outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override; bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5743,17 +5743,17 @@ }; struct OutlinerCosts { - const int CallTailCall; - const int FrameTailCall; - const int CallThunk; - const int FrameThunk; - const int CallNoLRSave; - const int FrameNoLRSave; - const int CallRegSave; - const int FrameRegSave; - const int CallDefault; - const int FrameDefault; - const int SaveRestoreLROnStack; + int CallTailCall; + int FrameTailCall; + int CallThunk; + int FrameThunk; + int CallNoLRSave; + int FrameNoLRSave; + int CallRegSave; + int FrameRegSave; + int CallDefault; + int FrameDefault; + int SaveRestoreLROnStack; OutlinerCosts(const ARMSubtarget &target) : CallTailCall(target.isThumb() ? 4 : 4), @@ -5874,6 +5874,24 @@ return outliner::OutlinedFunction(); } + // Partition the candidates in two sets: one with BTI enabled and one with BTI + // disabled. Remove the candidates from the smaller set. We expect the + // majority of the candidates to be in consensus with regard to branch target + // enforcement with just a few oddballs, but if they are the same number + // prefer the non-BTI ones for outlining, since they have less overhead. + auto NoBTI = + llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) { + const ARMFunctionInfo &AFI = *C.getMF()->getInfo(); + return AFI.branchTargetEnforcement(); + }); + if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) > + std::distance(NoBTI, RepeatedSequenceLocs.end())) + RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end()); + else + RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI); + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + // At this point, we have only "safe" candidates to outline. Figure out // frame + call instruction information. @@ -5887,6 +5905,16 @@ }; OutlinerCosts Costs(Subtarget); + const auto &SomeMFI = + *RepeatedSequenceLocs.front().getMF()->getInfo(); + // Adjust costs to account for the BTI instructions. + if (SomeMFI.branchTargetEnforcement()) { + Costs.FrameDefault += 4; + Costs.FrameNoLRSave += 4; + Costs.FrameRegSave += 4; + Costs.FrameTailCall += 4; + Costs.FrameThunk += 4; + } unsigned FrameID = MachineOutlinerDefault; unsigned NumBytesToCreateFrame = Costs.FrameDefault; @@ -6084,7 +6112,18 @@ } return false; +} + +void ARMBaseInstrInfo::mergeOutliningCandidateAttributes( + Function &F, std::vector &Candidates) const { + outliner::Candidate &C = Candidates.front(); + // branch-target-enforcement is guaranteed to be consistent between all + // candidates, so we only need to look at one. + const Function &CFn = C.getMF()->getFunction(); + if (CFn.hasFnAttribute("branch-target-enforcement")) + F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement")); + ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates); } bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( diff --git a/llvm/lib/Target/ARM/ARMBranchTargets.cpp b/llvm/lib/Target/ARM/ARMBranchTargets.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMBranchTargets.cpp @@ -0,0 +1,135 @@ +//===-- ARMBranchTargets.cpp -- Harden code using v8.1-M BTI extension -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass inserts BTI instructions at the start of every function and basic +// block which could be indirectly called. The hardware will (when enabled) +// trap when an indirect branch or call instruction targets an instruction +// which is not a valid BTI instruction. This is intended to guard against +// control-flow hijacking attacks. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMInstrInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "arm-branch-targets" +#define ARM_BRANCH_TARGETS_NAME "ARM Branch Targets" + +namespace { +class ARMBranchTargets : public MachineFunctionPass { +public: + static char ID; + ARMBranchTargets() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return ARM_BRANCH_TARGETS_NAME; } + +private: + void addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB, bool IsFirstBB); +}; +} // end anonymous namespace + +char ARMBranchTargets::ID = 0; + +INITIALIZE_PASS(ARMBranchTargets, "arm-branch-targets", ARM_BRANCH_TARGETS_NAME, + false, false) + +void ARMBranchTargets::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +FunctionPass *llvm::createARMBranchTargetsPass() { + return new ARMBranchTargets(); +} + +bool ARMBranchTargets::runOnMachineFunction(MachineFunction &MF) { + if (!MF.getInfo()->branchTargetEnforcement()) + return false; + + LLVM_DEBUG(dbgs() << "********** ARM Branch Targets **********\n" + << "********** Function: " << MF.getName() << '\n'); + const ARMInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + + // LLVM does not consider basic blocks which are the targets of jump tables + // to be address-taken (the address can't escape anywhere else), but they are + // used for indirect branches, so need BTI instructions. + SmallPtrSet JumpTableTargets; + if (const MachineJumpTableInfo *JTI = MF.getJumpTableInfo()) + for (const MachineJumpTableEntry &JTE : JTI->getJumpTables()) + for (const MachineBasicBlock *MBB : JTE.MBBs) + JumpTableTargets.insert(MBB); + + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + bool NeedBTI = false; + bool IsFirstBB = &MBB == &MF.front(); + + // Every function can potentially be called indirectly (even if it has + // static linkage, due to linker-generated veneers). + if (IsFirstBB) + NeedBTI = true; + + // If the block itself is address-taken, or is an exception landing pad, it + // could be indirectly branched to. + if (MBB.hasAddressTaken() || MBB.isEHPad() || JumpTableTargets.count(&MBB)) + NeedBTI = true; + + if (NeedBTI) { + addBTI(TII, MBB, IsFirstBB); + MadeChange = true; + } + } + + return MadeChange; +} + +/// Insert a BTI/PACBTI instruction into a given basic block \c MBB. If +/// \c IsFirstBB is true (meaning that this is the first BB in a function) try +/// to find a PAC instruction and replace it with PACBTI. Otherwise just insert +/// a BTI instruction. +/// The point of insertion is in the beginning of the BB, immediately after meta +/// instructions (such labels in exception handling landing pads). +void ARMBranchTargets::addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB, + bool IsFirstBB) { + // Which instruction to insert: BTI or PACBTI + unsigned OpCode = ARM::t2BTI; + + // Skip meta instructions, including EH labels + auto MBBI = llvm::find_if_not(MBB.instrs(), [](const MachineInstr &MI) { + return MI.isMetaInstruction(); + }); + + // If this is the first BB in a function, check if it starts with a PAC + // instruction and in that case remove the PAC instruction. + if (IsFirstBB) { + if (MBBI != MBB.instr_end() && MBBI->getOpcode() == ARM::t2PAC) { + LLVM_DEBUG(dbgs() << "Removing a 'PAC' instr from BB '" << MBB.getName() + << "' to replace with PACBTI\n"); + OpCode = ARM::t2PACBTI; + auto NextMBBI = std::next(MBBI); + MBBI->eraseFromParent(); + MBBI = NextMBBI; + } + } + + LLVM_DEBUG(dbgs() << "Inserting a '" + << (OpCode == ARM::t2BTI ? "BTI" : "PACBTI") + << "' instr into BB '" << MBB.getName() << "'\n"); + // Finally, insert a new instruction (either PAC or PACBTI) + BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII.get(OpCode)); +} diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -184,6 +184,9 @@ /// base address. DenseMap JumpTableUserIndices; + // Maps a MachineBasicBlock to the number of jump tables entries. + DenseMap BlockJumpTableRefCount; + /// ImmBranch - One per immediate branch, keeping the machine instruction /// pointer, conditional or unconditional, the max displacement, /// and (if isCond is true) the corresponding unconditional branch @@ -274,7 +277,10 @@ unsigned &DeadSize, bool &CanDeleteLEA, bool &BaseRegKill); bool optimizeThumb2JumpTables(); - MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, + void fixupBTI(unsigned JTI, MachineBasicBlock &OldBB, + MachineBasicBlock &NewBB); + MachineBasicBlock *adjustJTTargetBlockForward(unsigned JTI, + MachineBasicBlock *BB, MachineBasicBlock *JTBB); unsigned getUserOffset(CPUser&) const; @@ -518,6 +524,7 @@ CPEntries.clear(); JumpTableEntryIndices.clear(); JumpTableUserIndices.clear(); + BlockJumpTableRefCount.clear(); ImmBranches.clear(); PushPopMIs.clear(); T2JumpTables.clear(); @@ -720,6 +727,14 @@ return MCP->getConstants()[CPI].getAlign(); } +// Exception landing pads, blocks that has their adress taken, and function +// entry blocks will always be (potential) indirect jump targets, regardless of +// whether they are referenced by or not by jump tables. +static bool isAlwaysIndirectTarget(const MachineBasicBlock &MBB) { + return MBB.isEHPad() || MBB.hasAddressTaken() || + &MBB == &MBB.getParent()->front(); +} + /// scanFunctionJumpTables - Do a scan of the function, building up /// information about the sizes of each block and the locations of all /// the jump tables. @@ -730,6 +745,20 @@ (I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr)) T2JumpTables.push_back(&I); } + + if (!MF->getInfo()->branchTargetEnforcement()) + return; + + if (const MachineJumpTableInfo *JTI = MF->getJumpTableInfo()) + for (const MachineJumpTableEntry &JTE : JTI->getJumpTables()) + for (const MachineBasicBlock *MBB : JTE.MBBs) { + if (isAlwaysIndirectTarget(*MBB)) + // Set the reference count essentially to infinity, it will never + // reach zero and the BTI Instruction will never be removed. + BlockJumpTableRefCount[MBB] = std::numeric_limits::max(); + else + ++BlockJumpTableRefCount[MBB]; + } } /// initializeFunctionInfo - Do the initial scan of the function, building up @@ -2411,7 +2440,7 @@ // The destination precedes the switch. Try to move the block forward // so we have a positive offset. MachineBasicBlock *NewBB = - adjustJTTargetBlockForward(MBB, MI->getParent()); + adjustJTTargetBlockForward(JTI, MBB, MI->getParent()); if (NewBB) MJTI->ReplaceMBBInJumpTable(JTI, MBB, NewBB); MadeChange = true; @@ -2422,8 +2451,40 @@ return MadeChange; } -MachineBasicBlock *ARMConstantIslands:: -adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { +void ARMConstantIslands::fixupBTI(unsigned JTI, MachineBasicBlock &OldBB, + MachineBasicBlock &NewBB) { + assert(isThumb2 && "BTI in Thumb1?"); + + // Insert a BTI instruction into NewBB + BuildMI(NewBB, NewBB.begin(), DebugLoc(), TII->get(ARM::t2BTI)); + + // Update jump table reference counts. + const MachineJumpTableInfo &MJTI = *MF->getJumpTableInfo(); + const MachineJumpTableEntry &JTE = MJTI.getJumpTables()[JTI]; + for (const MachineBasicBlock *MBB : JTE.MBBs) { + if (MBB != &OldBB) + continue; + --BlockJumpTableRefCount[MBB]; + ++BlockJumpTableRefCount[&NewBB]; + } + + // If the old basic block reference count dropped to zero, remove + // the BTI instruction at its beginning. + if (BlockJumpTableRefCount[&OldBB] > 0) + return; + + // Skip meta instructions + auto BTIPos = llvm::find_if_not(OldBB.instrs(), [](const MachineInstr &MI) { + return MI.isMetaInstruction(); + }); + assert(BTIPos->getOpcode() == ARM::t2BTI && + "BasicBlock is mentioned in a jump table but does start with BTI"); + if (BTIPos->getOpcode() == ARM::t2BTI) + BTIPos->eraseFromParent(); +} + +MachineBasicBlock *ARMConstantIslands::adjustJTTargetBlockForward( + unsigned JTI, MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple @@ -2481,6 +2542,9 @@ NewBB->addSuccessor(BB); JTBB->replaceSuccessor(BB, NewBB); + if (MF->getInfo()->branchTargetEnforcement()) + fixupBTI(JTI, *BB, *NewBB); + ++NumJTInserted; return NewBB; } diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -92,6 +92,7 @@ initializeARMLoadStoreOptPass(Registry); initializeARMPreAllocLoadStoreOptPass(Registry); initializeARMParallelDSPPass(Registry); + initializeARMBranchTargetsPass(Registry); initializeARMConstantIslandsPass(Registry); initializeARMExecutionDomainFixPass(Registry); initializeARMExpandPseudoPass(Registry); @@ -571,6 +572,7 @@ } void ARMPassConfig::addPreEmitPass2() { + addPass(createARMBranchTargetsPass()); addPass(createARMConstantIslandPass()); addPass(createARMLowOverheadLoopsPass()); diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -25,6 +25,7 @@ ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp ARMBasicBlockInfo.cpp + ARMBranchTargets.cpp ARMCallingConv.cpp ARMCallLowering.cpp ARMConstantIslandPass.cpp diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -179,6 +179,7 @@ ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Outliner ; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: ARM Branch Targets ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: ARM constant island placement and branch shortening pass ; CHECK-NEXT: MachineDominator Tree Construction diff --git a/llvm/test/CodeGen/Thumb2/bti-const-island-multiple-jump-tables.mir b/llvm/test/CodeGen/Thumb2/bti-const-island-multiple-jump-tables.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-const-island-multiple-jump-tables.mir @@ -0,0 +1,311 @@ +# RUN: llc -verify-machineinstrs -run-pass arm-cp-islands %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-unknown-eabi" + + ; Tests adjustments to jump tables, made by the ARM Constant Islands pass + ; int g(int), h(int); + ; void g0(int), g1(int), g2(int); + ; void h0(int), h1(int), h2(int); + ; + ; void f(int x) { + ; for (;;) { + ; up: + ; x = g(x); + ; switch (x) { + ; case 0: + ; g0(x); + ; break; + ; case 1: + ; g1(x); + ; break; + ; case 2: + ; g2(x); + ; break; + ; case 3: + ; break; + ; case 4: + ; for (;;) { + ; x = h(x); + ; switch (x) { + ; case 0: + ; h0(x); + ; break; + ; case 1: + ; h1(x); + ; break; + ; case 2: + ; h2(x); + ; break; + ; case 3: + ; goto up; + ; case 4: + ; return; + ; } + ; } + ; } + ; } + ; } + + define hidden void @f(i32 %x) local_unnamed_addr #0 { + entry: + br label %up + + up: ; preds = %up, %sw.bb, %sw.bb1, %sw.bb2, %up.backedge.loopexit, %entry + %x.addr.1 = phi i32 [ %x, %entry ], [ %call, %up ], [ %call, %sw.bb2 ], [ %call, %sw.bb1 ], [ %call, %sw.bb ], [ %call5, %up.backedge.loopexit ] + %call = tail call i32 @g(i32 %x.addr.1) + switch i32 %call, label %up [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 4, label %for.cond4.preheader + ] + + for.cond4.preheader: ; preds = %up + br label %for.cond4 + + up.backedge.loopexit: ; preds = %for.cond4 + br label %up + + sw.bb: ; preds = %up + tail call void @g0(i32 0) + br label %up + + sw.bb1: ; preds = %up + tail call void @g1(i32 1) + br label %up + + sw.bb2: ; preds = %up + tail call void @g2(i32 2) + br label %up + + for.cond4: ; preds = %for.cond4, %sw.bb6, %sw.bb7, %sw.bb8, %for.cond4.preheader + %x.addr.2 = phi i32 [ %call, %for.cond4.preheader ], [ %call5, %sw.bb8 ], [ %call5, %sw.bb7 ], [ %call5, %sw.bb6 ], [ %call5, %for.cond4 ] + %call5 = tail call i32 @h(i32 %x.addr.2) + switch i32 %call5, label %for.cond4 [ + i32 0, label %sw.bb6 + i32 1, label %sw.bb7 + i32 2, label %sw.bb8 + i32 3, label %up.backedge.loopexit + i32 4, label %sw.bb10 + ] + + sw.bb6: ; preds = %for.cond4 + tail call void @h0(i32 0) + br label %for.cond4 + + sw.bb7: ; preds = %for.cond4 + tail call void @h1(i32 1) + br label %for.cond4 + + sw.bb8: ; preds = %for.cond4 + tail call void @h2(i32 2) + br label %for.cond4 + + sw.bb10: ; preds = %for.cond4 + ret void + } + + declare dso_local i32 @g(i32) + + declare dso_local void @g0(i32) + + declare dso_local void @g1(i32) + + declare dso_local void @g2(i32) + + declare dso_local i32 @h(i32) + + declare dso_local void @h0(i32) + + declare dso_local void @h1(i32) + + declare dso_local void @h2(i32) + + attributes #0 = { nounwind "disable-tail-calls"="false" "frame-pointer"="none" "no-jump-tables"="false" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fullfp16,+hwdiv,+lob,+ras,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" } + + !llvm.module.flags = !{!0} + + !0 = !{i32 1, !"branch-target-enforcement", i32 1} + +... +--- +name: f +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +jumpTable: + kind: inline + entries: + - id: 0 + blocks: [ '%bb.3', '%bb.4', '%bb.5', '%bb.1', '%bb.7' ] + - id: 1 + blocks: [ '%bb.6', '%bb.9', '%bb.10', '%bb.1', '%bb.11' ] + +# %bb.4 and %bb.10 redirect to %bb1, the rest are just renumbered +# CHECK-LABEL: jumpTable: +# CHECK-NEXT: kind: inline +# CHECK-NEXT: entries: +# CHECK-NEXT: - id: 0 +# CHECK-NEXT: blocks: [ '%bb.6', '%bb.14', '%bb.5', '%bb.4', '%bb.7' ] +# CHECK-NEXT: - id: 1 +# CHECK-NEXT: blocks: [ '%bb.11', '%bb.12', '%bb.13', '%bb.10', '%bb.15' ] + +# %bb.1 loses the BTI +# CHECK-LABEL: bb.1.up (align 4): +# CHECK-NOT: t2BTI +# CHECK-LABEL: bb.2.up: + +# CHECK-LABEL: bb.4.up: +# CHECK: t2BTI +# CHECK: tB %bb.1 + +# CHECK-LABEL: bb.10.for.cond4: +# CHECK: t2BTI +# CHECK: tB %bb.1 +body: | + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r4, $lr + + t2BTI + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r4, -8 + $r4 = tMOVr killed $r0, 14 /* CC::al */, $noreg + t2B %bb.1, 14 /* CC::al */, $noreg + + bb.5.sw.bb2: + successors: %bb.1(0x80000000) + liveins: $r4 + + t2BTI + $r0, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @g2, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp + + bb.1.up (align 4): + successors: %bb.1(0x20000000), %bb.2(0x60000000) + liveins: $r4 + + t2BTI + $r0 = tMOVr killed $r4, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @g, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 + $r4 = tMOVr $r0, 14 /* CC::al */, $noreg + tCMPi8 killed $r0, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.1, 8 /* CC::hi */, killed $cpsr + bb.2.up: + successors: %bb.3(0x15555555), %bb.4(0x15555555), %bb.5(0x15555555), %bb.1(0x2aaaaaab), %bb.7(0x15555555) + liveins: $r4 + + renamable $r0 = t2LEApcrelJT %jump-table.0, 14 /* CC::al */, $noreg + renamable $r0 = t2ADDrs killed renamable $r0, renamable $r4, 18, 14 /* CC::al */, $noreg, $noreg + t2BR_JT killed renamable $r0, renamable $r4, %jump-table.0 + + bb.3.sw.bb: + successors: %bb.1(0x80000000) + liveins: $r4 + + t2BTI + $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @g0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp + t2B %bb.1, 14 /* CC::al */, $noreg + + bb.6.sw.bb6: + successors: %bb.7(0x80000000) + liveins: $r4 + + t2BTI + $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @h0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp + + bb.7.for.cond4 (align 4): + successors: %bb.7(0x3efbefc0), %bb.8(0x41041040) + liveins: $r4 + + t2BTI + $r0 = tMOVr killed $r4, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @h, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 + $r4 = tMOVr $r0, 14 /* CC::al */, $noreg + tCMPi8 killed $r0, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.7, 8 /* CC::hi */, killed $cpsr + + bb.8.for.cond4: + successors: %bb.6(0x29555555), %bb.9(0x29555555), %bb.10(0x29555555), %bb.1(0x02000000), %bb.11(0x02000000) + liveins: $r4 + + renamable $r0 = t2LEApcrelJT %jump-table.1, 14 /* CC::al */, $noreg + renamable $r0 = t2ADDrs killed renamable $r0, renamable $r4, 18, 14 /* CC::al */, $noreg, $noreg + t2BR_JT killed renamable $r0, renamable $r4, %jump-table.1 + + bb.9.sw.bb7: + successors: %bb.7(0x80000000) + liveins: $r4 + + t2BTI + $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @h1, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp + t2B %bb.7, 14 /* CC::al */, $noreg + + bb.10.sw.bb8: + successors: %bb.7(0x80000000) + liveins: $r4 + + t2BTI + $r0, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @h2, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp + t2B %bb.7, 14 /* CC::al */, $noreg + + bb.4.sw.bb1: + successors: %bb.1(0x80000000) + liveins: $r4 + + t2BTI + $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @g1, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp + t2B %bb.1, 14 /* CC::al */, $noreg + + bb.11.sw.bb10: + t2BTI + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc + +... diff --git a/llvm/test/CodeGen/Thumb2/bti-const-island.mir b/llvm/test/CodeGen/Thumb2/bti-const-island.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-const-island.mir @@ -0,0 +1,168 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv7m-arm-none-eabi -run-pass=arm-cp-islands %s -o - | FileCheck %s + +# This test checks that the ARM Constant Island pass correctly handles BTI +# instructions when adding new BBs to jump tables. +# +# Specifically the pass will replace bb.1.bb42.i in the jump table with a new +# BB which will contain an unconditional branch to bb.1.bb42.i. +# We expect that a BTI instruction will be added to the new BB and removed from +# bb.1.bb42.i. + +--- | + declare noalias i8* @calloc(i32, i32) + + define internal i32 @test(i32 %argc, i8** nocapture %argv) { + entry: + br label %bb42.i + + bb5.i: + %0 = or i32 %argc, 32 + br label %bb42.i + + bb35.i: + %1 = call noalias i8* @calloc(i32 20, i32 1) + unreachable + + bb37.i: + %2 = call noalias i8* @calloc(i32 14, i32 1) + unreachable + + bb39.i: + %3 = call noalias i8* @calloc(i32 17, i32 1) + unreachable + + bb42.i: + switch i32 %argc, label %bb39.i [ + i32 70, label %bb35.i + i32 77, label %bb37.i + i32 100, label %bb5.i + i32 101, label %bb42.i + i32 116, label %bb42.i + ] + } + + !llvm.module.flags = !{!0} + !0 = !{i32 1, !"branch-target-enforcement", i32 1} + +... +--- +name: test +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$r0' } +frameInfo: + stackSize: 8 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +stack: + - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '$lr' } + - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '$r7' } +machineFunctionInfo: {} +jumpTable: + kind: inline + entries: + - id: 0 + blocks: [ '%bb.3', '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', + '%bb.5', '%bb.4', '%bb.5', '%bb.5', '%bb.5', '%bb.5', + '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', + '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', + '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', + '%bb.1', '%bb.1', '%bb.5', '%bb.5', '%bb.5', '%bb.5', + '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.5', + '%bb.5', '%bb.5', '%bb.5', '%bb.5', '%bb.1' ] +body: | + ; CHECK-LABEL: name: test + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r7, $lr + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 70, 14 /* CC::al */, $noreg + ; CHECK: bb.1.bb42.i (align 4): + ; CHECK: successors: %bb.6(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $r0 + ; CHECK: tCMPi8 renamable $r0, 46, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.6, 8 /* CC::hi */, killed $cpsr + ; CHECK: bb.2.bb42.i: + ; CHECK: successors: %bb.5(0x20000000), %bb.6(0x20000000), %bb.7(0x20000000), %bb.4(0x20000000) + ; CHECK: liveins: $r0 + ; CHECK: t2TBB_JT $pc, $r0, %jump-table.0, 0 + ; CHECK: bb.3: + ; CHECK: successors: + ; CHECK: JUMPTABLE_TBB 0, %jump-table.0, 188 + ; CHECK: bb.4.bb42.i: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0 + ; CHECK: t2BTI + ; CHECK: tB %bb.1, 14 /* CC::al */, $noreg + ; CHECK: bb.5.bb35.i: + ; CHECK: successors: + ; CHECK: t2BTI + ; CHECK: $r0, dead $cpsr = tMOVi8 20, 14 /* CC::al */, $noreg + ; CHECK: $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0 + ; CHECK: bb.6.bb39.i: + ; CHECK: successors: + ; CHECK: t2BTI + ; CHECK: $r0, dead $cpsr = tMOVi8 17, 14 /* CC::al */, $noreg + ; CHECK: $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0 + ; CHECK: bb.7.bb37.i: + ; CHECK: t2BTI + ; CHECK: $r0, dead $cpsr = tMOVi8 14, 14 /* CC::al */, $noreg + ; CHECK: $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0 + bb.0.entry: + liveins: $r0, $r7, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 70, 14 /* CC::al */, $noreg + + bb.1.bb42.i (align 4): + successors: %bb.5, %bb.2 + liveins: $r0 + + t2BTI + tCMPi8 renamable $r0, 46, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.5, 8 /* CC::hi */, killed $cpsr + + bb.2.bb42.i: + successors: %bb.3, %bb.5, %bb.4, %bb.1 + liveins: $r0 + + renamable $r1 = t2LEApcrelJT %jump-table.0, 14 /* CC::al */, $noreg + renamable $r1 = t2ADDrs killed renamable $r1, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg + t2BR_JT killed renamable $r1, renamable $r0, %jump-table.0 + + bb.3.bb35.i: + successors: + + t2BTI + $r0, dead $cpsr = tMOVi8 20, 14 /* CC::al */, $noreg + $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0 + + bb.5.bb39.i: + successors: + + t2BTI + $r0, dead $cpsr = tMOVi8 17, 14 /* CC::al */, $noreg + $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0 + + bb.4.bb37.i: + t2BTI + $r0, dead $cpsr = tMOVi8 14, 14 /* CC::al */, $noreg + $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @calloc, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp, implicit-def dead $r0 + +... diff --git a/llvm/test/CodeGen/Thumb2/bti-entry-blocks.ll b/llvm/test/CodeGen/Thumb2/bti-entry-blocks.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-entry-blocks.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=thumbv7m-arm-none-eabi | FileCheck %s + +define hidden i32 @linkage_external() local_unnamed_addr { +; CHECK-LABEL: linkage_external: +; CHECK: bti +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: bx lr +entry: + ret i32 1 +} + +define internal i32 @linkage_internal() unnamed_addr { +; CHECK-LABEL: linkage_internal: +; CHECK: bti +; CHECK: movs r0, #2 +; CHECK-NEXT: bx lr +entry: + ret i32 2 +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"branch-target-enforcement", i32 1} diff --git a/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll b/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv7m-arm-none-eabi | FileCheck %s + +define internal i32 @table_switch(i32 %x) { +; CHECK-LABEL: table_switch: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bti +; CHECK-NEXT: subs r1, r0, #1 +; CHECK-NEXT: cmp r1, #3 +; CHECK-NEXT: bhi .LBB0_4 +; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: tbb [pc, r1] +; CHECK-NEXT: @ %bb.2: +; CHECK-NEXT: .LJTI0_0: +; CHECK-NEXT: .byte (.LBB0_5-(.LCPI0_0+4))/2 +; CHECK-NEXT: .byte (.LBB0_3-(.LCPI0_0+4))/2 +; CHECK-NEXT: .byte (.LBB0_6-(.LCPI0_0+4))/2 +; CHECK-NEXT: .byte (.LBB0_7-(.LCPI0_0+4))/2 +; CHECK-NEXT: .p2align 1 +; CHECK-NEXT: .LBB0_3: @ %bb2 +; CHECK-NEXT: bti +; CHECK-NEXT: movs r0, #2 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB0_4: @ %sw.epilog +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: .LBB0_5: @ %return +; CHECK-NEXT: bti +; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB0_6: @ %bb3 +; CHECK-NEXT: bti +; CHECK-NEXT: movs r0, #3 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB0_7: @ %bb4 +; CHECK-NEXT: bti +; CHECK-NEXT: movs r0, #4 +; CHECK-NEXT: bx lr +entry: + switch i32 %x, label %sw.epilog [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + ] + +bb1: + br label %return +bb2: + br label %return +bb3: + br label %return +bb4: + br label %return +sw.epilog: + br label %return + +return: + %ret = phi i32 [ 0, %sw.epilog ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ] + ret i32 %ret +} + +@computed_goto_cases = private unnamed_addr constant [2 x i8*] [i8* blockaddress(@computed_goto, %return), i8* blockaddress(@computed_goto, %case_1)], align 4 + +define internal i32 @computed_goto(i32 %x) { +; CHECK-LABEL: computed_goto: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bti +; CHECK-NEXT: movw r1, :lower16:.Lcomputed_goto_cases +; CHECK-NEXT: movt r1, :upper16:.Lcomputed_goto_cases +; CHECK-NEXT: ldr.w r0, [r1, r0, lsl #2] +; CHECK-NEXT: mov pc, r0 +; CHECK-NEXT: .Ltmp3: @ Block address taken +; CHECK-NEXT: .LBB1_1: @ %return +; CHECK-NEXT: bti +; CHECK-NEXT: movs r0, #2 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .Ltmp4: @ Block address taken +; CHECK-NEXT: .LBB1_2: @ %case_1 +; CHECK-NEXT: bti +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: bx lr +entry: + %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @computed_goto_cases, i32 0, i32 %x + %0 = load i8*, i8** %arrayidx, align 4 + indirectbr i8* %0, [label %return, label %case_1] + +case_1: + br label %return + +return: + %ret = phi i32 [ 1, %case_1 ], [ 2, %entry ] + ret i32 %ret +} + +declare void @may_throw() +declare void @consume_exception(i8*) +declare i32 @__gxx_personality_v0(...) + +define internal i32 @exception_handling(i32 %0) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: exception_handling: +; CHECK: @ %bb.0: +; CHECK-NEXT: bti +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: bl may_throw +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: bti +; CHECK-NEXT: bl consume_exception +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: pop {r7, pc} +entry: + invoke void @may_throw() + to label %return unwind label %lpad + +lpad: + %1 = landingpad { i8*, i32 } + catch i8* null + %2 = extractvalue { i8*, i32 } %1, 0 + call void @consume_exception(i8* %2) + br label %return + +return: + %retval.0 = phi i32 [ 1, %lpad ], [ 0, %entry ] + ret i32 %retval.0 +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"branch-target-enforcement", i32 1} diff --git a/llvm/test/CodeGen/Thumb2/bti-jump-table.mir b/llvm/test/CodeGen/Thumb2/bti-jump-table.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-jump-table.mir @@ -0,0 +1,120 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc < %s -x mir -mtriple=thumbv7m-arm-none-eabi -run-pass=arm-branch-targets | FileCheck %s +--- | + define internal i32 @table_switch(i32 %x) { + entry: + switch i32 %x, label %sw.epilog [ + i32 1, label %return + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + ] + + bb2: + br label %return + + bb3: + br label %return + + bb4: + br label %return + + sw.epilog: + br label %return + + return: + %ret = phi i32 [ 0, %sw.epilog ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ], [ 1, %entry ] + ret i32 %ret + } + + !llvm.module.flags = !{!0} + !0 = !{i32 1, !"branch-target-enforcement", i32 1} + +... +--- +name: table_switch +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$r0' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +jumpTable: + kind: inline + entries: + - id: 0 + blocks: [ '%bb.6', '%bb.2', '%bb.3', '%bb.4' ] +body: | + ; CHECK-LABEL: name: table_switch + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $r0 + ; CHECK: renamable $r1, dead $cpsr = tSUBi3 killed renamable $r0, 1, 14 /* CC::al */, $noreg + ; CHECK: tCMPi8 renamable $r1, 3, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2Bcc %bb.3, 8 /* CC::hi */, killed $cpsr + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.4(0x20000000), %bb.2(0x20000000), %bb.5(0x20000000), %bb.6(0x20000000) + ; CHECK: liveins: $r1 + ; CHECK: renamable $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $r2 = t2LEApcrelJT %jump-table.0, 14 /* CC::al */, $noreg + ; CHECK: renamable $r2 = t2ADDrs killed renamable $r2, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg + ; CHECK: t2BR_JT killed renamable $r2, killed renamable $r1, %jump-table.0 + ; CHECK: bb.2.bb2: + ; CHECK: t2BTI + ; CHECK: renamable $r0, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + ; CHECK: bb.3.sw.epilog: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg + ; CHECK: bb.4.return: + ; CHECK: liveins: $r0 + ; CHECK: t2BTI + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + ; CHECK: bb.5.bb3: + ; CHECK: t2BTI + ; CHECK: renamable $r0, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + ; CHECK: bb.6.bb4: + ; CHECK: t2BTI + ; CHECK: renamable $r0, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + bb.0.entry: + successors: %bb.5, %bb.1 + liveins: $r0 + + renamable $r1, dead $cpsr = tSUBi3 killed renamable $r0, 1, 14 /* CC::al */, $noreg + tCMPi8 renamable $r1, 3, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.5, 8 /* CC::hi */, killed $cpsr + + bb.1.entry: + successors: %bb.6, %bb.2, %bb.3, %bb.4 + liveins: $r1 + + renamable $r0, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + renamable $r2 = t2LEApcrelJT %jump-table.0, 14 /* CC::al */, $noreg + renamable $r2 = t2ADDrs killed renamable $r2, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg + t2BR_JT killed renamable $r2, killed renamable $r1, %jump-table.0 + + bb.2.bb2: + renamable $r0, dead $cpsr = tMOVi8 2, 14 /* CC::al */, $noreg + tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + + bb.5.sw.epilog: + renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg + + bb.6.return: + liveins: $r0 + + tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + + bb.3.bb3: + renamable $r0, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg + tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + + bb.4.bb4: + renamable $r0, dead $cpsr = tMOVi8 4, 14 /* CC::al */, $noreg + tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + +... diff --git a/llvm/test/CodeGen/Thumb2/bti-outliner-1.ll b/llvm/test/CodeGen/Thumb2/bti-outliner-1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-outliner-1.ll @@ -0,0 +1,101 @@ +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s + +; Check that each outlining candidate and the outlined function are in agreement +; with regard to whether BTI insertion is enabled or not. + +; volatile int a, b, c, d, e, f; +; +; int x(int p) { +; int r = (a + b) / (c + d) * e + f; +; return r + 1; +; } +; +; __attribute__((target("branch-protection=none"))) +; int y(int p) { +; int r = (a + b) / (c + d) * e + f; +; return r + 2; +; } +; +; __attribute__((target("branch-protection=bti"))) +; int z(int p) { +; int r = (a + b) / (c + d) * e + f; +; return r + 3; +; } + +@a = hidden global i32 0, align 4 +@b = hidden global i32 0, align 4 +@c = hidden global i32 0, align 4 +@d = hidden global i32 0, align 4 +@e = hidden global i32 0, align 4 +@f = hidden global i32 0, align 4 + +define hidden i32 @x(i32 %p) local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %mul = mul nsw i32 %4, %div + %5 = load volatile i32, i32* @f, align 4 + %add2 = add nsw i32 %mul, %5 + %add3 = add nsw i32 %add2, 1 + ret i32 %add3 +} +; CHECK-LABEL: x: +; CHECK-NOT: bti +; CHECK: bl OUTLINED_FUNCTION_0 + +define hidden i32 @y(i32 %p) local_unnamed_addr #1 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %mul = mul nsw i32 %4, %div + %5 = load volatile i32, i32* @f, align 4 + %add2 = add nsw i32 %mul, %5 + %add3 = add nsw i32 %add2, 2 + ret i32 %add3 +} +; CHECK-LABEL: y: +; CHECK-NOT: bti +; CHECK: bl OUTLINED_FUNCTION_0 + +define hidden i32 @z(i32 %p) local_unnamed_addr #2 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %mul = mul nsw i32 %4, %div + %5 = load volatile i32, i32* @f, align 4 + %add2 = add nsw i32 %mul, %5 + %add3 = add nsw i32 %add2, 3 + ret i32 %add3 +} +; CHECK-LABEL: z: +; CHECK bti +; CHECK-NOT: bl OUTLINED_FUNCTION + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK-NOT: bti + +attributes #0 = { minsize nofree norecurse nounwind optsize } +attributes #1 = { minsize nofree norecurse nounwind optsize "branch-target-enforcement"="false" } +attributes #2 = { minsize nofree norecurse nounwind optsize "branch-target-enforcement"="true" } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/bti-outliner-2.ll b/llvm/test/CodeGen/Thumb2/bti-outliner-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-outliner-2.ll @@ -0,0 +1,82 @@ +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s + +; See bti-outliner-1.ll +; Difference is the BTI placement is enabled by default for the entire module. + +@a = hidden global i32 0, align 4 +@b = hidden global i32 0, align 4 +@c = hidden global i32 0, align 4 +@d = hidden global i32 0, align 4 +@e = hidden global i32 0, align 4 +@f = hidden global i32 0, align 4 + +define hidden i32 @x(i32 %p) local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %mul = mul nsw i32 %4, %div + %5 = load volatile i32, i32* @f, align 4 + %add2 = add nsw i32 %mul, %5 + %add3 = add nsw i32 %add2, 1 + ret i32 %add3 +} +; CHECK-LABEL: x: +; CHECK: bti +; CHECK: bl OUTLINED_FUNCTION_0 + +define hidden i32 @y(i32 %p) local_unnamed_addr #1 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %mul = mul nsw i32 %4, %div + %5 = load volatile i32, i32* @f, align 4 + %add2 = add nsw i32 %mul, %5 + %add3 = add nsw i32 %add2, 2 + ret i32 %add3 +} +; CHECK-LABEL: y: +; CHECK-NOT: bti +; CHECK-NOT: bl OUTLINED_FUNCTION + +define hidden i32 @z(i32 %p) local_unnamed_addr #2 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %mul = mul nsw i32 %4, %div + %5 = load volatile i32, i32* @f, align 4 + %add2 = add nsw i32 %mul, %5 + %add3 = add nsw i32 %add2, 3 + ret i32 %add3 +} +; CHECK-LABEL: z: +; CHECK: bti +; CHECK: bl OUTLINED_FUNCTION_0 + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK: bti + +attributes #0 = { minsize nofree norecurse nounwind optsize } +attributes #1 = { minsize nofree norecurse nounwind optsize "branch-target-enforcement"="false" } +attributes #2 = { minsize nofree norecurse nounwind optsize "branch-target-enforcement"="true" } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"branch-target-enforcement", i32 1} diff --git a/llvm/test/CodeGen/Thumb2/bti-outliner-cost-1.ll b/llvm/test/CodeGen/Thumb2/bti-outliner-cost-1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-outliner-cost-1.ll @@ -0,0 +1,67 @@ +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s + +; Check an edge case of the outlining costs - +; outlining occurs in this test and does not in `bti-outliner-cost-2.ll` +; the only difference being the branch target enforcement is enabled in the +; latter one. + +; volatile int a, b, c, d, e; +; +; int y(int p) { +; int r = (a + b) / (c + d) * e; +; return r + 1; +; } +; +; int y(int p) { +; int r = (a + b) / (c + d) * e; +; return r + 2; +; } + +@a = hidden global i32 0, align 4 +@b = hidden global i32 0, align 4 +@c = hidden global i32 0, align 4 +@d = hidden global i32 0, align 4 +@e = hidden global i32 0, align 4 + +define hidden i32 @x(i32 %p) local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %mul = mul nsw i32 %4, %div + %add2 = add nsw i32 %mul, 1 + ret i32 %add2 +} +; CHECK-LABEL: x: +; CHECK: bl OUTLINED_FUNCTION_0 + +define hidden i32 @y(i32 %p) local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %mul = mul nsw i32 %4, %div + %add2 = add nsw i32 %mul, 2 + ret i32 %add2 +} +; CHECK-LABEL: y: +; CHECK: bl OUTLINED_FUNCTION_0 + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK-NOT: bti + +attributes #0 = { minsize nofree norecurse nounwind optsize } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/bti-outliner-cost-2.ll b/llvm/test/CodeGen/Thumb2/bti-outliner-cost-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-outliner-cost-2.ll @@ -0,0 +1,51 @@ +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s + +; See `bti-outliner-cost-1.ll` + +@a = hidden global i32 0, align 4 +@b = hidden global i32 0, align 4 +@c = hidden global i32 0, align 4 +@d = hidden global i32 0, align 4 +@e = hidden global i32 0, align 4 + +define hidden i32 @x(i32 %p) local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %mul = mul nsw i32 %4, %div + %add2 = add nsw i32 %mul, 1 + ret i32 %add2 +} +; CHECK-LABEL: x: +; CHECK-NOT: bl OUTLINED_FUNCTION + +define hidden i32 @y(i32 %p) local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %mul = mul nsw i32 %4, %div + %add2 = add nsw i32 %mul, 2 + ret i32 %add2 +} +; CHECK-LABEL: y: +; CHECK-NOT: bl OUTLINED_FUNCTION + +; CHECK-NOT: OUTLINED_FUNCTION + +attributes #0 = { minsize nofree norecurse nounwind optsize } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"branch-target-enforcement", i32 1}