Index: llvm/trunk/lib/Target/AArch64/AArch64.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64.h +++ llvm/trunk/lib/Target/AArch64/AArch64.h @@ -46,6 +46,7 @@ FunctionPass *createAArch64A53Fix835769(); FunctionPass *createFalkorHWPFFixPass(); FunctionPass *createFalkorMarkStridedAccessesPass(); +FunctionPass *createAArch64BranchTargetsPass(); FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); @@ -58,6 +59,7 @@ void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); void initializeAArch64AdvSIMDScalarPass(PassRegistry&); +void initializeAArch64BranchTargetsPass(PassRegistry&); void initializeAArch64CollectLOHPass(PassRegistry&); void initializeAArch64CondBrTuningPass(PassRegistry &); void initializeAArch64ConditionalComparesPass(PassRegistry&); Index: llvm/trunk/lib/Target/AArch64/AArch64BranchTargets.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64BranchTargets.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64BranchTargets.cpp @@ -0,0 +1,130 @@ +//===-- AArch64BranchTargets.cpp -- Harden code using v8.5-A BTI extension -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass inserts BTI instructions at the start of every function and basic +// block which could be indirectly called. The hardware will (when enabled) +// trap when an indirect branch or call instruction targets an instruction +// which is not a valid BTI instruction. This is intended to guard against +// control-flow hijacking attacks. Note that this does not do anything for RET +// instructions, as they can be more precisely protected by return address +// signing. +// +//===----------------------------------------------------------------------===// + +#include "AArch64Subtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-branch-targets" +#define AARCH64_BRANCH_TARGETS_NAME "AArch64 Branch Targets" + +namespace { +class AArch64BranchTargets : public MachineFunctionPass { +public: + static char ID; + AArch64BranchTargets() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return AARCH64_BRANCH_TARGETS_NAME; } + +private: + void addBTI(MachineBasicBlock &MBB, bool CouldCall, bool CouldJump); +}; +} // end anonymous namespace + +char AArch64BranchTargets::ID = 0; + +INITIALIZE_PASS(AArch64BranchTargets, "aarch64-branch-targets", + AARCH64_BRANCH_TARGETS_NAME, false, false) + +void AArch64BranchTargets::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +FunctionPass *llvm::createAArch64BranchTargetsPass() { + return new AArch64BranchTargets(); +} + +bool AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) { + const Function &F = MF.getFunction(); + if (!F.hasFnAttribute("branch-target-enforcement")) + return false; + + LLVM_DEBUG( + dbgs() << "********** AArch64 Branch Targets **********\n" + << "********** Function: " << MF.getName() << '\n'); + + // LLVM does not consider basic blocks which are the targets of jump tables + // to be address-taken (the address can't escape anywhere else), but they are + // used for indirect branches, so need BTI instructions. + SmallPtrSet JumpTableTargets; + if (auto *JTI = MF.getJumpTableInfo()) + for (auto &JTE : JTI->getJumpTables()) + for (auto *MBB : JTE.MBBs) + JumpTableTargets.insert(MBB); + + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + bool CouldCall = false, CouldJump = false; + // If the function is address-taken or externally-visible, it could be + // indirectly called. PLT entries and tail-calls use BR, but when they are + // are in guarded pages should all use x16 or x17 to hold the called + // address, so we don't need to set CouldJump here. BR instructions in + // non-guarded pages (which might be non-BTI-aware code) are allowed to + // branch to a "BTI c" using any register. + if (&MBB == &*MF.begin() && (F.hasAddressTaken() || !F.hasLocalLinkage())) + CouldCall = true; + + // If the block itself is address-taken, it could be indirectly branched + // to, but not called. + if (MBB.hasAddressTaken() || JumpTableTargets.count(&MBB)) + CouldJump = true; + + if (CouldCall || CouldJump) { + addBTI(MBB, CouldCall, CouldJump); + MadeChange = true; + } + } + + return MadeChange; +} + +void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall, + bool CouldJump) { + LLVM_DEBUG(dbgs() << "Adding BTI " << (CouldJump ? "j" : "") + << (CouldCall ? "c" : "") << " to " << MBB.getName() + << "\n"); + + const AArch64InstrInfo *TII = static_cast( + MBB.getParent()->getSubtarget().getInstrInfo()); + + unsigned HintNum = 32; + if (CouldCall) + HintNum |= 2; + if (CouldJump) + HintNum |= 4; + assert(HintNum != 32 && "No target kinds!"); + + auto MBBI = MBB.begin(); + + // PACI[AB]SP are implicitly BTI JC, so no BTI instruction needed there. + if (MBBI != MBB.end() && (MBBI->getOpcode() == AArch64::PACIASP || + MBBI->getOpcode() == AArch64::PACIBSP)) + return; + + BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), + TII->get(AArch64::HINT)) + .addImm(HintNum); +} Index: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -141,6 +141,11 @@ static cl::opt EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix", cl::init(true), cl::Hidden); +static cl::opt + EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden, + cl::desc("Enable the AAcrh64 branch target pass"), + cl::init(true)); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine X(getTheAArch64leTarget()); @@ -151,6 +156,7 @@ initializeAArch64A53Fix835769Pass(*PR); initializeAArch64A57FPLoadBalancingPass(*PR); initializeAArch64AdvSIMDScalarPass(*PR); + initializeAArch64BranchTargetsPass(*PR); initializeAArch64CollectLOHPass(*PR); initializeAArch64ConditionalComparesPass(*PR); initializeAArch64ConditionOptimizerPass(*PR); @@ -537,6 +543,9 @@ if (BranchRelaxation) addPass(&BranchRelaxationPassID); + if (EnableBranchTargets) + addPass(createAArch64BranchTargetsPass()); + if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && TM->getTargetTriple().isOSBinFormatMachO()) addPass(createAArch64CollectLOHPass()); Index: llvm/trunk/lib/Target/AArch64/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/AArch64/CMakeLists.txt +++ llvm/trunk/lib/Target/AArch64/CMakeLists.txt @@ -22,6 +22,7 @@ AArch64A57FPLoadBalancing.cpp AArch64AdvSIMDScalarPass.cpp AArch64AsmPrinter.cpp + AArch64BranchTargets.cpp AArch64CallLowering.cpp AArch64CleanupLocalDynamicTLSPass.cpp AArch64CollectLOH.cpp Index: llvm/trunk/test/CodeGen/AArch64/O0-pipeline.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/O0-pipeline.ll +++ llvm/trunk/test/CodeGen/AArch64/O0-pipeline.ll @@ -52,6 +52,7 @@ ; CHECK-NEXT: AArch64 pseudo instruction expansion pass ; CHECK-NEXT: Analyze Machine Code For Garbage Collection ; CHECK-NEXT: Branch relaxation pass +; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis Index: llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll +++ llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll @@ -150,6 +150,7 @@ ; CHECK-NEXT: MachinePostDominator Tree Construction ; CHECK-NEXT: Branch Probability Basic Block Placement ; CHECK-NEXT: Branch relaxation pass +; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis Index: llvm/trunk/test/CodeGen/AArch64/branch-target-enforcment.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/branch-target-enforcment.mir +++ llvm/trunk/test/CodeGen/AArch64/branch-target-enforcment.mir @@ -0,0 +1,325 @@ +# RUN: llc -run-pass=aarch64-branch-targets %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-arm-none-eabi" + + define hidden i32 @simple_external() "branch-target-enforcement" { + entry: + ret i32 0 + } + + define internal i32 @simple_internal() "branch-target-enforcement" { + entry: + ret i32 0 + } + + define hidden i32 @ptr_auth() "branch-target-enforcement" { + entry: + tail call void asm sideeffect "", "~{lr}"() + ret i32 0 + } + + define hidden i32 @ptr_auth_b() "branch-target-enforcement" { + entry: + tail call void asm sideeffect "", "~{lr}"() + ret i32 0 + } + + define hidden i32 @jump_table(i32 %a) "branch-target-enforcement" { + entry: + switch i32 %a, label %sw.epilog [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + i32 3, label %sw.bb2 + i32 4, label %sw.bb3 + i32 5, label %sw.bb4 + ] + + sw.bb: ; preds = %entry + tail call void asm sideeffect "", ""() + br label %sw.epilog + + sw.bb1: ; preds = %entry + tail call void asm sideeffect "", ""() + br label %sw.epilog + + sw.bb2: ; preds = %entry + tail call void asm sideeffect "", ""() + br label %sw.epilog + + sw.bb3: ; preds = %entry + tail call void asm sideeffect "", ""() + br label %sw.epilog + + sw.bb4: ; preds = %entry + tail call void asm sideeffect "", ""() + br label %sw.epilog + + sw.epilog: ; preds = %entry, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb + ret i32 0 + } + + @label_address.addr = internal unnamed_addr global i8* blockaddress(@label_address, %return), align 8 + + define hidden i32 @label_address() "branch-target-enforcement" { + entry: + %0 = load i8*, i8** @label_address.addr, align 8 + indirectbr i8* %0, [label %return, label %lab2] + + lab2: ; preds = %entry + br label %.split + + return: ; preds = %entry + br label %.split + + .split: ; preds = %lab2, %return + %merge = phi i8* [ blockaddress(@label_address, %lab2), %return ], [ blockaddress(@label_address, %return), %lab2 ] + %merge2 = phi i32 [ 1, %return ], [ 2, %lab2 ] + store i8* %merge, i8** @label_address.addr, align 8 + ret i32 %merge2 + } + + define hidden i32 @label_address_entry() "branch-target-enforcement" { + entry: + %0 = load i8*, i8** @label_address.addr, align 8 + indirectbr i8* %0, [label %return, label %lab2] + + lab2: ; preds = %entry + br label %.split + + return: ; preds = %entry + br label %.split + + .split: ; preds = %lab2, %return + %merge = phi i8* [ blockaddress(@label_address, %lab2), %return ], [ blockaddress(@label_address, %return), %lab2 ] + %merge2 = phi i32 [ 1, %return ], [ 2, %lab2 ] + store i8* %merge, i8** @label_address.addr, align 8 + ret i32 %merge2 + } + +... +--- +# External function, could be addres-taken elsewhere so needs BTI JC. +name: simple_external +body: | + bb.0.entry: + ; CHECK-LABEL: name: simple_external + ; CHECK: HINT 34 + ; CHECK: RET + $w0 = ORRWrs $wzr, $wzr, 0 + RET undef $lr, implicit killed $w0 + +--- +# Internal function, not address-taken in this module, so no BTI needed. +name: simple_internal +body: | + bb.0.entry: + ; CHECK-LABEL: name: simple_internal + ; CHECK-NOT: HINT + ; CHECK: RET + $w0 = ORRWrs $wzr, $wzr, 0 + RET undef $lr, implicit killed $w0 + +--- +# Function starts with PACIASP, which implicitly acts as BTI JC, so no change +# needed. +name: ptr_auth +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: 0, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.0.entry: + liveins: $lr + + ; CHECK-LABEL: name: ptr_auth + ; CHECK-NOT: HINT + ; CHECK: frame-setup PACIASP + ; CHECK-NOT: HINT + ; CHECK: RETAA + frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store 8 into %stack.0) + INLINEASM &"", 1, 12, implicit-def dead early-clobber $lr + $w0 = ORRWrs $wzr, $wzr, 0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load 8 from %stack.0) + RETAA implicit killed $w0 + +--- +# Function starts with PACIBSP, which implicitly acts as BTI JC, so no change +# needed. +name: ptr_auth_b +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: 0, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.0.entry: + liveins: $lr + + ; CHECK-LABEL: name: ptr_auth_b + ; CHECK-NOT: HINT + ; CHECK: frame-setup PACIBSP + ; CHECK-NOT: HINT + ; CHECK: RETAB + frame-setup PACIBSP implicit-def $lr, implicit killed $lr, implicit $sp + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store 8 into %stack.0) + INLINEASM &"", 1, 12, implicit-def dead early-clobber $lr + $w0 = ORRWrs $wzr, $wzr, 0 + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load 8 from %stack.0) + RETAB implicit killed $w0 + +--- +# Function contains a jump table, so every target of the jump table must start +# with BTI J. +name: jump_table +jumpTable: + kind: block-address + entries: + - id: 0 + blocks: [ '%bb.2', '%bb.3', '%bb.4', '%bb.5', '%bb.6' ] +body: | + bb.0.entry: + ; CHECK-LABEL: name: jump_table + ; CHECK: HINT 34 + successors: %bb.7(0x15555555), %bb.1(0x6aaaaaab) + liveins: $w0 + + renamable $w8 = SUBWri killed renamable $w0, 1, 0, implicit-def $x8 + dead $wzr = SUBSWri renamable $w8, 4, 0, implicit-def $nzcv + Bcc 8, %bb.7, implicit $nzcv + + bb.1.entry: + ; CHECK: bb.1.entry: + ; CHECK-NOT: HINT + ; CHECK: BR killed renamable $x8 + successors: %bb.2(0x1999999a), %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a) + liveins: $x8 + + $x9 = ADRP target-flags(aarch64-page) %jump-table.0 + renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0, 0 + renamable $x8 = LDRXroX killed renamable $x9, killed renamable $x8, 0, 1 :: (load 8 from jump-table) + BR killed renamable $x8 + + bb.2.sw.bb: + ; CHECK: bb.2.sw.bb + ; CHECK-NEXT: HINT 36 + $w0 = ORRWrs $wzr, $wzr, 0 + INLINEASM &"", 1 + RET undef $lr, implicit killed $w0 + + bb.3.sw.bb1: + ; CHECK: bb.3.sw.bb1 + ; CHECK-NEXT: HINT 36 + $w0 = ORRWrs $wzr, $wzr, 0 + INLINEASM &"", 1 + RET undef $lr, implicit killed $w0 + + bb.4.sw.bb2: + ; CHECK: bb.4.sw.bb2 + ; CHECK-NEXT: HINT 36 + $w0 = ORRWrs $wzr, $wzr, 0 + INLINEASM &"", 1 + RET undef $lr, implicit killed $w0 + + bb.5.sw.bb3: + ; CHECK: bb.5.sw.bb3 + ; CHECK-NEXT: HINT 36 + $w0 = ORRWrs $wzr, $wzr, 0 + INLINEASM &"", 1 + RET undef $lr, implicit killed $w0 + + bb.6.sw.bb4: + ; CHECK: bb.6.sw.bb4 + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ }} + ; CHECK-NEXT: HINT 36 + successors: %bb.7(0x80000000) + + INLINEASM &"", 1 + + bb.7.sw.epilog: + ; CHECK: bb.7.sw.epilog: + ; CHECK-NOT: HINT + ; CHECK: RET + $w0 = ORRWrs $wzr, $wzr, 0 + RET undef $lr, implicit killed $w0 + +--- +# Function takes address of basic blocks, so they must start with BTI J. +name: label_address +body: | + bb.0.entry: + ; CHECK-LABEL: label_address + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ }} + ; CHECK-NEXT: HINT 34 + ; CHECK: BR killed renamable $x9 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + + renamable $x8 = ADRP target-flags(aarch64-page) @label_address.addr + renamable $x9 = LDRXui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (dereferenceable load 8 from @label_address.addr) + BR killed renamable $x9 + + bb.1.return (address-taken): + ; CHECK: bb.1.return (address-taken): + ; CHECK-NEXT: HINT 36 + liveins: $x8 + + $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.lab2) + renamable $w0 = ORRWri $wzr, 0 + renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.lab2), 0 + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store 8 into @label_address.addr) + RET undef $lr, implicit killed $w0 + + bb.2.lab2 (address-taken): + ; CHECK: bb.2.lab2 (address-taken): + ; CHECK-NEXT: HINT 36 + liveins: $x8 + + $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.return) + renamable $w0 = ORRWri $wzr, 1984 + renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.return), 0 + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store 8 into @label_address.addr) + RET undef $lr, implicit killed $w0 + +--- +# Function takes address of the entry block, so the entry block needs a BTI JC. +name: label_address_entry +body: | + bb.0.entry (address-taken): + ; CHECK-LABEL: label_address_entry + ; CHECK: bb.0.entry (address-taken): + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ }} + ; CHECK-NEXT: HINT 38 + ; CHECK: BR killed renamable $x9 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + + renamable $x8 = ADRP target-flags(aarch64-page) @label_address.addr + renamable $x9 = LDRXui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (dereferenceable load 8 from @label_address.addr) + BR killed renamable $x9 + + bb.1.return (address-taken): + ; CHECK: bb.1.return (address-taken): + ; CHECK-NEXT: HINT 36 + liveins: $x8 + + $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.entry) + renamable $w0 = ORRWri $wzr, 0 + renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.entry), 0 + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store 8 into @label_address.addr) + RET undef $lr, implicit killed $w0 + + bb.2.lab2: + ; CHECK: bb.2.lab2: + ; CHECK-NOT: HINT + liveins: $x8 + + $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.return) + renamable $w0 = ORRWri $wzr, 1984 + renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.return), 0 + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store 8 into @label_address.addr) + RET undef $lr, implicit killed $w0 + +...