Index: llvm/trunk/lib/Target/AArch64/AArch64SpeculationHardening.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64SpeculationHardening.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64SpeculationHardening.cpp @@ -102,6 +102,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" @@ -145,25 +146,31 @@ BitVector RegsAlreadyMasked; bool functionUsesHardeningRegister(MachineFunction &MF) const; - bool instrumentControlFlow(MachineBasicBlock &MBB); + bool instrumentControlFlow(MachineBasicBlock &MBB, + bool &UsesFullSpeculationBarrier); bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, AArch64CC::CondCode &CondCode) const; void insertTrackingCode(MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode, DebugLoc DL) const; - void insertSPToRegTaintPropagation(MachineBasicBlock *MBB, + void insertSPToRegTaintPropagation(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const; - void insertRegToSPTaintPropagation(MachineBasicBlock *MBB, + void insertRegToSPTaintPropagation(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned TmpReg) const; + void insertFullSpeculationBarrier(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) const; bool slhLoads(MachineBasicBlock &MBB); bool makeGPRSpeculationSafe(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI, unsigned Reg); - bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB); + bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB, + bool UsesFullSpeculationBarrier); bool expandSpeculationSafeValue(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI); + MachineBasicBlock::iterator MBBI, + bool UsesFullSpeculationBarrier); bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL); }; @@ -206,15 +213,19 @@ return true; } +void AArch64SpeculationHardening::insertFullSpeculationBarrier( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc DL) const { + // A full control flow speculation barrier consists of (DSB SYS + ISB) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf); +} + void AArch64SpeculationHardening::insertTrackingCode( MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode, DebugLoc DL) const { if (UseControlFlowSpeculationBarrier) { - // insert full control flow speculation barrier (DSB SYS + ISB) - BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB)) - .addImm(0xf); - BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB)) - .addImm(0xf); + insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL); } else { BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr)) .addDef(MisspeculatingTaintReg) @@ -226,7 +237,7 @@ } bool AArch64SpeculationHardening::instrumentControlFlow( - MachineBasicBlock &MBB) { + MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) { LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB); bool Modified = false; @@ -262,55 +273,105 @@ } // Perform correct code generation around function calls and before returns. - { - SmallVector ReturnInstructions; - SmallVector CallInstructions; + // The below variables record the return/terminator instructions and the call + // instructions respectively; including which register is available as a + // temporary register just before the recorded instructions. + SmallVector, 4> ReturnInstructions; + SmallVector, 4> CallInstructions; + // if a temporary register is not available for at least one of the + // instructions for which we need to transfer taint to the stack pointer, we + // need to insert a full speculation barrier. + // TmpRegisterNotAvailableEverywhere tracks that condition. + bool TmpRegisterNotAvailableEverywhere = false; + + RegScavenger RS; + RS.enterBasicBlock(MBB); + + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) { + MachineInstr &MI = *I; + if (!MI.isReturn() && !MI.isCall()) + continue; - for (MachineInstr &MI : MBB) { - if (MI.isReturn()) - ReturnInstructions.push_back(&MI); - else if (MI.isCall()) - CallInstructions.push_back(&MI); + // The RegScavenger represents registers available *after* the MI + // instruction pointed to by RS.getCurrentPosition(). + // We need to have a register that is available *before* the MI is executed. + if (I != MBB.begin()) + RS.forward(std::prev(I)); + // FIXME: The below just finds *a* unused register. Maybe code could be + // optimized more if this looks for the register that isn't used for the + // longest time around this place, to enable more scheduling freedom. Not + // sure if that would actually result in a big performance difference + // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic + // already to do this - but it's unclear if that could easily be used here. + unsigned TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass); + LLVM_DEBUG(dbgs() << "RS finds " + << ((TmpReg == 0) ? "no register " : "register "); + if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " "; + dbgs() << "to be available at MI " << MI); + if (TmpReg == 0) + TmpRegisterNotAvailableEverywhere = true; + if (MI.isReturn()) + ReturnInstructions.push_back({&MI, TmpReg}); + else if (MI.isCall()) + CallInstructions.push_back({&MI, TmpReg}); + } + + if (TmpRegisterNotAvailableEverywhere) { + // When a temporary register is not available everywhere in this basic + // basic block where a propagate-taint-to-sp operation is needed, just + // emit a full speculation barrier at the start of this basic block, which + // renders the taint/speculation tracking in this basic block unnecessary. + insertFullSpeculationBarrier(MBB, MBB.begin(), + (MBB.begin())->getDebugLoc()); + UsesFullSpeculationBarrier = true; + Modified = true; + } else { + for (auto MI_Reg : ReturnInstructions) { + assert(MI_Reg.second != 0); + LLVM_DEBUG( + dbgs() + << " About to insert Reg to SP taint propagation with temp register " + << printReg(MI_Reg.second, TRI) + << " on instruction: " << *MI_Reg.first); + insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second); + Modified = true; } - Modified |= - (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0); - - for (MachineInstr *Return : ReturnInstructions) - insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17); - for (MachineInstr *Call : CallInstructions) { + for (auto MI_Reg : CallInstructions) { + assert(MI_Reg.second != 0); + LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint " + "propagation with temp register " + << printReg(MI_Reg.second, TRI) + << " around instruction: " << *MI_Reg.first); // Just after the call: - MachineBasicBlock::iterator i = Call; - i++; - insertSPToRegTaintPropagation(Call->getParent(), i); + insertSPToRegTaintPropagation( + MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first)); // Just before the call: - insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17); + insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second); + Modified = true; } } - return Modified; } void AArch64SpeculationHardening::insertSPToRegTaintPropagation( - MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const { + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { // If full control flow speculation barriers are used, emit a control flow // barrier to block potential miss-speculation in flight coming in to this // function. if (UseControlFlowSpeculationBarrier) { - // insert full control flow speculation barrier (DSB SYS + ISB) - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf); - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf); + insertFullSpeculationBarrier(MBB, MBBI, DebugLoc()); return; } // CMP SP, #0 === SUBS xzr, SP, #0 - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri)) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri)) .addDef(AArch64::XZR) .addUse(AArch64::SP) .addImm(0) .addImm(0); // no shift // CSETM x16, NE === CSINV x16, xzr, xzr, EQ - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr)) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr)) .addDef(MisspeculatingTaintReg) .addUse(AArch64::XZR) .addUse(AArch64::XZR) @@ -318,7 +379,7 @@ } void AArch64SpeculationHardening::insertRegToSPTaintPropagation( - MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned TmpReg) const { // If full control flow speculation barriers are used, there will not be // miss-speculation when returning from this function, and therefore, also @@ -327,19 +388,19 @@ return; // mov Xtmp, SP === ADD Xtmp, SP, #0 - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) .addDef(TmpReg) .addUse(AArch64::SP) .addImm(0) .addImm(0); // no shift // and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0 - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs)) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs)) .addDef(TmpReg, RegState::Renamable) .addUse(TmpReg, RegState::Kill | RegState::Renamable) .addUse(MisspeculatingTaintReg, RegState::Kill) .addImm(0); // mov SP, Xtmp === ADD SP, Xtmp, #0 - BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) .addDef(AArch64::SP) .addUse(TmpReg, RegState::Kill) .addImm(0) @@ -483,7 +544,8 @@ /// \brief If MBBI references a pseudo instruction that should be expanded /// here, do the expansion and return true. Otherwise return false. bool AArch64SpeculationHardening::expandSpeculationSafeValue( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + bool UsesFullSpeculationBarrier) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); bool Is64Bit = true; @@ -498,7 +560,7 @@ // Just remove the SpeculationSafe pseudo's if control flow // miss-speculation isn't happening because we're already inserting barriers // to guarantee that. - if (!UseControlFlowSpeculationBarrier) { + if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) { unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); // Mark this register and all its aliasing registers as needing to be @@ -536,7 +598,7 @@ } bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos( - MachineBasicBlock &MBB) { + MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) { bool Modified = false; RegsNeedingCSDBBeforeUse.reset(); @@ -571,15 +633,16 @@ break; } - if (NeedToEmitBarrier) + if (NeedToEmitBarrier && !UsesFullSpeculationBarrier) Modified |= insertCSDB(MBB, MBBI, DL); - Modified |= expandSpeculationSafeValue(MBB, MBBI); + Modified |= + expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier); MBBI = NMBBI; } - if (RegsNeedingCSDBBeforeUse.any()) + if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier) Modified |= insertCSDB(MBB, MBBI, DL); return Modified; @@ -608,7 +671,7 @@ Modified |= slhLoads(MBB); } - // 2.a Add instrumentation code to function entry and exits. + // 2. Add instrumentation code to function entry and exits. LLVM_DEBUG( dbgs() << "***** AArch64SpeculationHardening - track control flow *****\n"); @@ -619,17 +682,15 @@ EntryBlocks.push_back(LPI.LandingPadBlock); for (auto Entry : EntryBlocks) insertSPToRegTaintPropagation( - Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin())); + *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin())); - // 2.b Add instrumentation code to every basic block. - for (auto &MBB : MF) - Modified |= instrumentControlFlow(MBB); - - LLVM_DEBUG(dbgs() << "***** AArch64SpeculationHardening - Lowering " - "SpeculationSafeValue Pseudos *****\n"); - // Step 3: Lower SpeculationSafeValue pseudo instructions. - for (auto &MBB : MF) - Modified |= lowerSpeculationSafeValuePseudos(MBB); + // 3. Add instrumentation code to every basic block. + for (auto &MBB : MF) { + bool UsesFullSpeculationBarrier = false; + Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier); + Modified |= + lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier); + } return Modified; } Index: llvm/trunk/test/CodeGen/AArch64/speculation-hardening-loads.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/speculation-hardening-loads.ll +++ llvm/trunk/test/CodeGen/AArch64/speculation-hardening-loads.ll @@ -11,10 +11,10 @@ ; CHECK-NEXT: and x8, x8, x16 ; CHECK-NEXT: and x1, x1, x16 ; CHECK-NEXT: csdb -; CHECK-NEXT: mov x17, sp -; CHECK-NEXT: and x17, x17, x16 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 ; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret } @@ -29,9 +29,9 @@ ; CHECK-NEXT: and x0, x0, x16 ; CHECK-NEXT: csdb ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: mov x17, sp -; CHECK-NEXT: and x17, x17, x16 -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret } @@ -51,12 +51,12 @@ ; CHECK-NEXT: and x8, x8, x16 ; csdb instruction must occur before the add instruction with w8 as operand. ; CHECK-NEXT: csdb -; CHECK-NEXT: mov x17, sp ; CHECK-NEXT: add w9, w1, w8 ; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: and x17, x17, x16 ; CHECK-NEXT: csel w0, w1, w9, eq -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret } @@ -76,12 +76,12 @@ ; CHECK-NEXT: and w8, w8, w16 ; csdb instruction must occur before the add instruction with x8 as operand. ; CHECK-NEXT: csdb -; CHECK-NEXT: mov x17, sp ; CHECK-NEXT: add x9, x1, x8 ; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: and x17, x17, x16 ; CHECK-NEXT: csel x0, x1, x9, eq -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret } @@ -112,11 +112,11 @@ ; CHECK-NEXT: and x1, x1, x16 ; CHECK-NEXT: csdb ; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: mov x17, sp -; CHECK-NEXT: and x17, x17, x16 ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret } @@ -129,9 +129,9 @@ ; CHECK-NEXT: and x1, x1, x16 ; CHECK-NEXT: csdb ; CHECK-NEXT: ld1 { v0.d }[0], [x1] -; CHECK-NEXT: mov x17, sp -; CHECK-NEXT: and x17, x17, x16 -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret %0 = load double, double* %b, align 16 %vld1_lane = insertelement <2 x double> , double %0, i32 0 @@ -147,9 +147,9 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr w8, [sp, #12] ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: mov x17, sp -; CHECK-NEXT: and x17, x17, x16 -; CHECK-NEXT: mov sp, x17 +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp +; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 +; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret %a = alloca i32, align 4 %val = load volatile i32, i32* %a, align 4 Index: llvm/trunk/test/CodeGen/AArch64/speculation-hardening.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/speculation-hardening.ll +++ llvm/trunk/test/CodeGen/AArch64/speculation-hardening.ll @@ -1,9 +1,9 @@ -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH,GISELSLH --dump-input-on-failure +; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,GISELNOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR { ; CHECK-LABEL: f @@ -13,12 +13,12 @@ ; NOSLH-NOT: cmp sp, #0 ; NOSLH-NOT: csetm x16, ne -; SLH: mov x17, sp -; SLH: and x17, x17, x16 -; SLH: mov sp, x17 -; NOSLH-NOT: mov x17, sp -; NOSLH-NOT: and x17, x17, x16 -; NOSLH-NOT: mov sp, x17 +; SLH: mov [[TMPREG:x[0-9]+]], sp +; SLH: and [[TMPREG]], [[TMPREG]], x16 +; SLH: mov sp, [[TMPREG]] +; NOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp +; NOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16 +; NOSLH-NOT: mov sp, [[TMPREG]] %call = tail call i32 @tail_callee(i32 %i) ; SLH: cmp sp, #0 ; SLH: csetm x16, ne @@ -43,29 +43,37 @@ ; NOSLH-NOT: csel x16, x16, xzr, [[COND]] return: ; preds = %entry, %if.then %retval.0 = phi i32 [ %conv, %if.then ], [ 0, %entry ] -; SLH: mov x17, sp -; SLH: and x17, x17, x16 -; SLH: mov sp, x17 -; NOSLH-NOT: mov x17, sp -; NOSLH-NOT: and x17, x17, x16 -; NOSLH-NOT: mov sp, x17 +; SLH: mov [[TMPREG:x[0-9]+]], sp +; SLH: and [[TMPREG]], [[TMPREG]], x16 +; SLH: mov sp, [[TMPREG]] +; NOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp +; NOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16 +; NOSLH-NOT: mov sp, [[TMPREG]] ret i32 %retval.0 } ; Make sure that for a tail call, taint doesn't get put into SP twice. define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR { ; CHECK-LABEL: tail_caller: -; SLH: mov x17, sp -; SLH: and x17, x17, x16 -; SLH: mov sp, x17 -; NOSLH-NOT: mov x17, sp -; NOSLH-NOT: and x17, x17, x16 -; NOSLH-NOT: mov sp, x17 +; NOGISELSLH: mov [[TMPREG:x[0-9]+]], sp +; NOGISELSLH: and [[TMPREG]], [[TMPREG]], x16 +; NOGISELSLH: mov sp, [[TMPREG]] +; NOGISELNOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp +; NOGISELNOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16 +; NOGISELNOSLH-NOT: mov sp, [[TMPREG]] +; GISELSLH: mov [[TMPREG:x[0-9]+]], sp +; GISELSLH: and [[TMPREG]], [[TMPREG]], x16 +; GISELSLH: mov sp, [[TMPREG]] +; GISELNOSLH-NOT: mov [[TMPREG:x[0-9]+]], sp +; GISELNOSLH-NOT: and [[TMPREG]], [[TMPREG]], x16 +; GISELNOSLH-NOT: mov sp, [[TMPREG]] ; GlobalISel doesn't optimize tail calls (yet?), so only check that ; cross-call taint register setup code is missing if a tail call was ; actually produced. -; SLH: {{(bl tail_callee[[:space:]] cmp sp, #0)|(b tail_callee)}} -; SLH-NOT: cmp sp, #0 +; NOGISELSLH: b tail_callee +; GISELSLH: bl tail_callee +; GISELSLH: cmp sp, #0 +; SLH-NOT: cmp sp, #0 %call = tail call i32 @tail_callee(i32 %a) ret i32 %call } Index: llvm/trunk/test/CodeGen/AArch64/speculation-hardening.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/speculation-hardening.mir +++ llvm/trunk/test/CodeGen/AArch64/speculation-hardening.mir @@ -1,5 +1,6 @@ # RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu \ # RUN: -start-before aarch64-speculation-hardening -o - %s \ +# RUN: -debug-only=aarch64-speculation-hardening \ # RUN: | FileCheck %s --dump-input-on-failure # Check that the speculation hardening pass generates code as expected for @@ -25,6 +26,22 @@ define void @indirectbranch(i32 %a, i32 %b) speculative_load_hardening { ret void } + ; Also check that a non-default temporary register gets picked correctly to + ; transfer the SP to to and it with the taint register when the default + ; temporary isn't available. + define void @indirect_call_x17(i32 %a, i32 %b) speculative_load_hardening { + ret void + } + @g = common dso_local local_unnamed_addr global i64 (...)* null, align 8 + define void @indirect_tailcall_x17(i32 %a, i32 %b) speculative_load_hardening { + ret void + } + define void @indirect_call_lr(i32 %a, i32 %b) speculative_load_hardening { + ret void + } + define void @RS_cannot_find_available_regs() speculative_load_hardening { + ret void + } ... --- name: nobranch_fallthrough @@ -115,3 +132,72 @@ ; CHECK-NOT: csel RET undef $lr, implicit $x0 ... +--- +name: indirect_call_x17 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x17 + ; CHECK-LABEL: indirect_call_x17 + ; CHECK: mov x0, sp + ; CHECK: and x0, x0, x16 + ; CHECK: mov sp, x0 + ; CHECK: blr x17 + BLR killed renamable $x17, implicit-def dead $lr, implicit $sp + RET undef $lr, implicit undef $w0 +... +--- +name: indirect_tailcall_x17 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: indirect_tailcall_x17 + ; CHECK: mov x1, sp + ; CHECK: and x1, x1, x16 + ; CHECK: mov sp, x1 + ; CHECK: br x17 + $x8 = ADRP target-flags(aarch64-page) @g + $x17 = LDRXui killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @g + TCRETURNri killed $x17, 0, implicit $sp, implicit $x0 +... +--- +name: indirect_call_lr +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: indirect_call_lr + ; CHECK: mov x1, sp + ; CHECK-NEXT: and x1, x1, x16 + ; CHECK-NEXT: mov sp, x1 + ; CHECK-NEXT: blr x30 + liveins: $x0, $lr + BLR killed renamable $lr, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 + $w0 = nsw ADDWri killed $w0, 1, 0 + RET undef $lr, implicit $w0 +... +--- +name: RS_cannot_find_available_regs +tracksRegLiveness: true +body: | + bb.0: + ; In the rare case when no free temporary register is available for the + ; propagate taint-to-sp operation, just put in a full speculation barrier + ; (isb+dsb sy) at the start of the basic block. And don't put masks on + ; instructions for the rest of the basic block, since speculation in that + ; basic block was already done, so no need to do masking. + ; CHECK-LABEL: RS_cannot_find_available_regs + ; CHECK: dsb sy + ; CHECK-NEXT: isb + ; CHECK-NEXT: ldr x0, [x0] + ; The following 2 instructions come from propagating the taint encoded in + ; sp at function entry to x16. It turns out the taint info in x16 is not + ; used in this function, so those instructions could be optimized away. An + ; optimization for later if it turns out this situation occurs often enough. + ; CHECK-NEXT: cmp sp, #0 + ; CHECK-NEXT: csetm x16, ne + ; CHECK-NEXT: ret + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp, $lr + $x0 = LDRXui killed $x0, 0 + RET undef $lr, implicit $x0 +...