Index: llvm/trunk/lib/Target/AArch64/AArch64.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64.h +++ llvm/trunk/lib/Target/AArch64/AArch64.h @@ -39,6 +39,7 @@ CodeGenOpt::Level OptLevel); FunctionPass *createAArch64StorePairSuppressPass(); FunctionPass *createAArch64ExpandPseudoPass(); +FunctionPass *createAArch64SpeculationHardeningPass(); FunctionPass *createAArch64LoadStoreOptimizationPass(); FunctionPass *createAArch64SIMDInstrOptPass(); ModulePass *createAArch64PromoteConstantPass(); @@ -68,6 +69,7 @@ void initializeAArch64ConditionOptimizerPass(PassRegistry&); void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&); void initializeAArch64ExpandPseudoPass(PassRegistry&); +void initializeAArch64SpeculationHardeningPass(PassRegistry&); void initializeAArch64LoadStoreOptPass(PassRegistry&); void initializeAArch64SIMDInstrOptPass(PassRegistry&); void initializeAArch64PreLegalizerCombinerPass(PassRegistry&); Index: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp @@ -2258,6 +2258,13 @@ /// Try to emit a combined compare-and-branch instruction. bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { + // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions + // will not be produced, as they are conditional branch instructions that do + // not set flags. + if (FuncInfo.MF->getFunction().hasFnAttribute( + Attribute::SpeculativeLoadHardening)) + return false; + assert(isa(BI->getCondition()) && "Expected cmp instruction"); const CmpInst *CI = cast(BI->getCondition()); CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4343,6 +4343,13 @@ SDValue Dest = Op.getOperand(4); SDLoc dl(Op); + MachineFunction &MF = DAG.getMachineFunction(); + // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions + // will not be produced, as they are conditional branch instructions that do + // not set flags. + bool ProduceNonFlagSettingCondBr = + !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); + // Handle f128 first, since lowering it will result in comparing the return // value of a libcall against zero, which is just what the rest of LowerBR_CC // is expecting to deal with. @@ -4385,7 +4392,7 @@ // If the RHS of the comparison is zero, we can potentially fold this // to a specialized branch. const ConstantSDNode *RHSC = dyn_cast(RHS); - if (RHSC && RHSC->getZExtValue() == 0) { + if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) { if (CC == ISD::SETEQ) { // See if we can use a TBZ to fold in an AND as well. // TBZ has a smaller branch displacement than CBZ. If the offset is @@ -4428,7 +4435,7 @@ } } if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT && - LHS.getOpcode() != ISD::AND) { + LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) { // Don't combine AND since emitComparison converts the AND to an ANDS // (a.k.a. TST) and the test in the test bit and branch instruction // becomes redundant. This would also increase register pressure. @@ -10807,6 +10814,13 @@ static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions + // will not be produced, as they are conditional branch instructions that do + // not set flags. + if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) + return SDValue(); + if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3)) N = NV.getNode(); SDValue Chain = N->getOperand(0); Index: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -964,6 +964,13 @@ const MachineFunction &MF) const { if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) return true; + switch (MI.getOpcode()) { + case AArch64::DSB: + case AArch64::ISB: + // DSB and ISB also are scheduling barriers. + return true; + default:; + } return isSEHInstruction(MI); } Index: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -788,16 +788,36 @@ const unsigned CondReg = I.getOperand(0).getReg(); MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); - if (selectCompareBranch(I, MF, MRI)) + // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z + // instructions will not be produced, as they are conditional branch + // instructions that do not set flags. + bool ProduceNonFlagSettingCondBr = + !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); + if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI)) return true; - auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW)) - .addUse(CondReg) - .addImm(/*bit offset=*/0) - .addMBB(DestMBB); + if (ProduceNonFlagSettingCondBr) { + auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW)) + .addUse(CondReg) + .addImm(/*bit offset=*/0) + .addMBB(DestMBB); - I.eraseFromParent(); - return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); + } else { + auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) + .addDef(AArch64::WZR) + .addUse(CondReg) + .addImm(1); + constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI); + auto Bcc = + BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc)) + .addImm(AArch64CC::EQ) + .addMBB(DestMBB); + + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI); + } } case TargetOpcode::G_BRINDIRECT: { Index: llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -203,6 +203,10 @@ if (hasBasePointer(MF)) markSuperRegs(Reserved, AArch64::W19); + // SLH uses register W16/X16 as the taint register. + if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) + markSuperRegs(Reserved, AArch64::W16); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } Index: llvm/trunk/lib/Target/AArch64/AArch64SpeculationHardening.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64SpeculationHardening.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64SpeculationHardening.cpp @@ -0,0 +1,368 @@ +//===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass to insert code to mitigate against side channel +// vulnerabilities that may happen under control flow miss-speculation. +// +// The pass implements tracking of control flow miss-speculation into a "taint" +// register. That taint register can then be used to mask off registers with +// sensitive data when executing under miss-speculation, a.k.a. "transient +// execution". +// This pass is aimed at mitigating against SpectreV1-style vulnarabilities. +// +// At the moment, it implements the tracking of miss-speculation of control +// flow into a taint register, but doesn't implement a mechanism yet to then +// use that taint register to mask of vulnerable data in registers (something +// for a follow-on improvement). Possible strategies to mask out vulnerable +// data that can be implemented on top of this are: +// - speculative load hardening to automatically mask of data loaded +// in registers. +// - using intrinsics to mask of data in registers as indicated by the +// programmer (see https://lwn.net/Articles/759423/). +// +// For AArch64, the following implementation choices are made below. +// Some of these are different than the implementation choices made in +// the similar pass implemented in X86SpeculativeLoadHardening.cpp, as +// the instruction set characteristics result in different trade-offs. +// - The speculation hardening is done after register allocation. With a +// relative abundance of registers, one register is reserved (X16) to be +// the taint register. X16 is expected to not clash with other register +// reservation mechanisms with very high probability because: +// . The AArch64 ABI doesn't guarantee X16 to be retained across any call. +// . The only way to request X16 to be used as a programmer is through +// inline assembly. In the rare case a function explicitly demands to +// use X16/W16, this pass falls back to hardening against speculation +// by inserting a DSB SYS/ISB barrier pair which will prevent control +// flow speculation. +// - It is easy to insert mask operations at this late stage as we have +// mask operations available that don't set flags. +// - The taint variable contains all-ones when no miss-speculation is detected, +// and contains all-zeros when miss-speculation is detected. Therefore, when +// masking, an AND instruction (which only changes the register to be masked, +// no other side effects) can easily be inserted anywhere that's needed. +// - The tracking of miss-speculation is done by using a data-flow conditional +// select instruction (CSEL) to evaluate the flags that were also used to +// make conditional branch direction decisions. Speculation of the CSEL +// instruction can be limited with a CSDB instruction - so the combination of +// CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL +// aren't speculated. When conditional branch direction gets miss-speculated, +// the semantics of the inserted CSEL instruction is such that the taint +// register will contain all zero bits. +// One key requirement for this to work is that the conditional branch is +// followed by an execution of the CSEL instruction, where the CSEL +// instruction needs to use the same flags status as the conditional branch. +// This means that the conditional branches must not be implemented as one +// of the AArch64 conditional branches that do not use the flags as input +// (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction +// selectors to not produce these instructions when speculation hardening +// is enabled. This pass will assert if it does encounter such an instruction. +// - On function call boundaries, the miss-speculation state is transferred from +// the taint register X16 to be encoded in the SP register as value 0. +// +// Future extensions/improvements could be: +// - Implement this functionality using full speculation barriers, akin to the +// x86-slh-lfence option. This may be more useful for the intrinsics-based +// approach than for the SLH approach to masking. +// Note that this pass already inserts the full speculation barriers if the +// function for some niche reason makes use of X16/W16. +// - no indirect branch misprediction gets protected/instrumented; but this +// could be done for some indirect branches, such as switch jump tables. +//===----------------------------------------------------------------------===// + +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-speculation-hardening" + +#define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass" + +namespace { + +class AArch64SpeculationHardening : public MachineFunctionPass { +public: + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + + static char ID; + + AArch64SpeculationHardening() : MachineFunctionPass(ID) { + initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { + return AARCH64_SPECULATION_HARDENING_NAME; + } + +private: + unsigned MisspeculatingTaintReg; + bool UseControlFlowSpeculationBarrier; + + bool functionUsesHardeningRegister(MachineFunction &MF) const; + bool instrumentControlFlow(MachineBasicBlock &MBB); + bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + AArch64CC::CondCode &CondCode) const; + void insertTrackingCode(MachineBasicBlock &SplitEdgeBB, + AArch64CC::CondCode &CondCode, DebugLoc DL) const; + void insertSPToRegTaintPropagation(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MBBI) const; + void insertRegToSPTaintPropagation(MachineBasicBlock *MBB, + MachineBasicBlock::iterator MBBI, + unsigned TmpReg) const; +}; + +} // end anonymous namespace + +char AArch64SpeculationHardening::ID = 0; + +INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening", + AARCH64_SPECULATION_HARDENING_NAME, false, false) + +bool AArch64SpeculationHardening::endsWithCondControlFlow( + MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + AArch64CC::CondCode &CondCode) const { + SmallVector analyzeBranchCondCode; + if (TII->analyzeBranch(MBB, TBB, FBB, analyzeBranchCondCode, false)) + return false; + + // Ignore if the BB ends in an unconditional branch/fall-through. + if (analyzeBranchCondCode.empty()) + return false; + + // If the BB ends with a single conditional branch, FBB will be set to + // nullptr (see API docs for TII->analyzeBranch). For the rest of the + // analysis we want the FBB block to be set always. + assert(TBB != nullptr); + if (FBB == nullptr) + FBB = MBB.getFallThrough(); + + // If both the true and the false condition jump to the same basic block, + // there isn't need for any protection - whether the branch is speculated + // correctly or not, we end up executing the architecturally correct code. + if (TBB == FBB) + return false; + + assert(MBB.succ_size() == 2); + // translate analyzeBranchCondCode to CondCode. + assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format"); + CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm()); + return true; +} + +void AArch64SpeculationHardening::insertTrackingCode( + MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode, + DebugLoc DL) const { + if (UseControlFlowSpeculationBarrier) { + // insert full control flow speculation barrier (DSB SYS + ISB) + BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB)) + .addImm(0xf); + BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB)) + .addImm(0xf); + } else { + BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr)) + .addDef(MisspeculatingTaintReg) + .addUse(MisspeculatingTaintReg) + .addUse(AArch64::XZR) + .addImm(CondCode); + SplitEdgeBB.addLiveIn(AArch64::NZCV); + } +} + +bool AArch64SpeculationHardening::instrumentControlFlow( + MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB); + + bool Modified = false; + MachineBasicBlock *TBB = nullptr; + MachineBasicBlock *FBB = nullptr; + AArch64CC::CondCode CondCode; + + if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) { + LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n"); + } else { + // Now insert: + // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and + // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False + // edge. + AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(CondCode); + + MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(TBB, *this); + MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(FBB, *this); + + assert(SplitEdgeTBB != nullptr); + assert(SplitEdgeFBB != nullptr); + + DebugLoc DL; + if (MBB.instr_end() != MBB.instr_begin()) + DL = (--MBB.instr_end())->getDebugLoc(); + + insertTrackingCode(*SplitEdgeTBB, CondCode, DL); + insertTrackingCode(*SplitEdgeFBB, InvCondCode, DL); + + LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n"); + LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n"); + Modified = true; + } + + // Perform correct code generation around function calls and before returns. + { + SmallVector ReturnInstructions; + SmallVector CallInstructions; + + for (MachineInstr &MI : MBB) { + if (MI.isReturn()) + ReturnInstructions.push_back(&MI); + else if (MI.isCall()) + CallInstructions.push_back(&MI); + } + + Modified |= + (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0); + + for (MachineInstr *Return : ReturnInstructions) + insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17); + for (MachineInstr *Call : CallInstructions) { + // Just after the call: + MachineBasicBlock::iterator i = Call; + i++; + insertSPToRegTaintPropagation(Call->getParent(), i); + // Just before the call: + insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17); + } + } + + return Modified; +} + +void AArch64SpeculationHardening::insertSPToRegTaintPropagation( + MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const { + // If full control flow speculation barriers are used, emit a control flow + // barrier to block potential miss-speculation in flight coming in to this + // function. + if (UseControlFlowSpeculationBarrier) { + // insert full control flow speculation barrier (DSB SYS + ISB) + BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf); + BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf); + return; + } + + // CMP SP, #0 === SUBS xzr, SP, #0 + BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri)) + .addDef(AArch64::XZR) + .addUse(AArch64::SP) + .addImm(0) + .addImm(0); // no shift + // CSETM x16, NE === CSINV x16, xzr, xzr, EQ + BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr)) + .addDef(MisspeculatingTaintReg) + .addUse(AArch64::XZR) + .addUse(AArch64::XZR) + .addImm(AArch64CC::EQ); +} + +void AArch64SpeculationHardening::insertRegToSPTaintPropagation( + MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, + unsigned TmpReg) const { + // If full control flow speculation barriers are used, there will not be + // miss-speculation when returning from this function, and therefore, also + // no need to encode potential miss-speculation into the stack pointer. + if (UseControlFlowSpeculationBarrier) + return; + + // mov Xtmp, SP === ADD Xtmp, SP, #0 + BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) + .addDef(TmpReg) + .addUse(AArch64::SP) + .addImm(0) + .addImm(0); // no shift + // and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0 + BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs)) + .addDef(TmpReg, RegState::Renamable) + .addUse(TmpReg, RegState::Kill | RegState::Renamable) + .addUse(MisspeculatingTaintReg, RegState::Kill) + .addImm(0); + // mov SP, Xtmp === ADD SP, Xtmp, #0 + BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri)) + .addDef(AArch64::SP) + .addUse(TmpReg, RegState::Kill) + .addImm(0) + .addImm(0); // no shift +} + +bool AArch64SpeculationHardening::functionUsesHardeningRegister( + MachineFunction &MF) const { + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + // treat function calls specially, as the hardening register does not + // need to remain live across function calls. + if (MI.isCall()) + continue; + if (MI.readsRegister(MisspeculatingTaintReg, TRI) || + MI.modifiesRegister(MisspeculatingTaintReg, TRI)) + return true; + } + } + return false; +} + +bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) { + if (!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) + return false; + + MisspeculatingTaintReg = AArch64::X16; + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + bool Modified = false; + + UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF); + + // Instrument control flow speculation tracking, if requested. + LLVM_DEBUG( + dbgs() + << "***** AArch64SpeculationHardening - track control flow *****\n"); + + // 1. Add instrumentation code to function entry and exits. + SmallVector EntryBlocks; + EntryBlocks.push_back(&MF.front()); + for (const LandingPadInfo &LPI : MF.getLandingPads()) + EntryBlocks.push_back(LPI.LandingPadBlock); + for (auto Entry : EntryBlocks) + insertSPToRegTaintPropagation( + Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin())); + + // 2. Add instrumentation code to every basic block. + for (auto &MBB : MF) + Modified |= instrumentControlFlow(MBB); + + return Modified; +} + +/// \brief Returns an instance of the pseudo instruction expansion pass. +FunctionPass *llvm::createAArch64SpeculationHardeningPass() { + return new AArch64SpeculationHardening(); +} Index: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -177,6 +177,7 @@ initializeFalkorHWPFFixPass(*PR); initializeFalkorMarkStridedAccessesLegacyPass(*PR); initializeLDTLSCleanupPass(*PR); + initializeAArch64SpeculationHardeningPass(*PR); } //===----------------------------------------------------------------------===// @@ -550,6 +551,16 @@ if (TM->getOptLevel() != CodeGenOpt::None) { if (EnableLoadStoreOpt) addPass(createAArch64LoadStoreOptimizationPass()); + } + + // The AArch64SpeculationHardeningPass destroys dominator tree and natural + // loop info, which is needed for the FalkorHWPFFixPass and also later on. + // Therefore, run the AArch64SpeculationHardeningPass before the + // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop + // info. + addPass(createAArch64SpeculationHardeningPass()); + + if (TM->getOptLevel() != CodeGenOpt::None) { if (EnableFalkorHWPFFix) addPass(createFalkorHWPFFixPass()); } Index: llvm/trunk/lib/Target/AArch64/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/AArch64/CMakeLists.txt +++ llvm/trunk/lib/Target/AArch64/CMakeLists.txt @@ -52,6 +52,7 @@ AArch64RegisterBankInfo.cpp AArch64RegisterInfo.cpp AArch64SelectionDAGInfo.cpp + AArch64SpeculationHardening.cpp AArch64StorePairSuppress.cpp AArch64Subtarget.cpp AArch64TargetMachine.cpp Index: llvm/trunk/test/CodeGen/AArch64/O0-pipeline.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/O0-pipeline.ll +++ llvm/trunk/test/CodeGen/AArch64/O0-pipeline.ll @@ -50,6 +50,7 @@ ; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization ; CHECK-NEXT: Post-RA pseudo instruction expansion pass ; CHECK-NEXT: AArch64 pseudo instruction expansion pass +; CHECK-NEXT: AArch64 speculation hardening pass ; CHECK-NEXT: Analyze Machine Code For Garbage Collection ; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: AArch64 Branch Targets Index: llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll +++ llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll @@ -146,6 +146,7 @@ ; CHECK-NEXT: Post-RA pseudo instruction expansion pass ; CHECK-NEXT: AArch64 pseudo instruction expansion pass ; CHECK-NEXT: AArch64 load / store optimization pass +; CHECK-NEXT: AArch64 speculation hardening pass ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Falkor HW Prefetch Fix Late Phase Index: llvm/trunk/test/CodeGen/AArch64/speculation-hardening-dagisel.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/speculation-hardening-dagisel.ll +++ llvm/trunk/test/CodeGen/AArch64/speculation-hardening-dagisel.ll @@ -0,0 +1,71 @@ +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure + +declare i64 @g(i64, i64) local_unnamed_addr +define i64 @f_using_reserved_reg_x16(i64 %a, i64 %b) local_unnamed_addr SLHATTR { +; CHECK-LABEL: f_using_reserved_reg_x16 +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +entry: + %cmp = icmp ugt i64 %a, %b + br i1 %cmp, label %if.then, label %cleanup + +; CHECK: b.ls +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +if.then: + %0 = tail call i64 asm "autia1716", "={x17},{x16},0"(i64 %b, i64 %a) +; CHECK: bl g +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +; CHECK: ret + %call = tail call i64 @g(i64 %a, i64 %b) #3 + %add = add i64 %call, %0 + br label %cleanup + +cleanup: +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +; SLH: ret + %retval.0 = phi i64 [ %add, %if.then ], [ %b, %entry ] + ret i64 %retval.0 +} + +define i32 @f_clobbered_reg_w16(i32 %a, i32 %b) local_unnamed_addr SLHATTR { +; CHECK-LABEL: f_clobbered_reg_w16 +entry: +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb + %cmp = icmp sgt i32 %a, %b + br i1 %cmp, label %if.then, label %if.end +; CHECK: b.le + +if.then: +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +; CHECK: mov w16, w0 + tail call void asm sideeffect "mov w16, ${0:w}", "r,~{w16}"(i32 %a) + br label %if.end +; SLH: ret + +if.end: + %add = add nsw i32 %b, %a + ret i32 %add +; SLH: dsb sy +; SLH: isb +; NOSLH-NOT: dsb sy +; NOSLH-NOT: isb +; SLH: ret +} Index: llvm/trunk/test/CodeGen/AArch64/speculation-hardening.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/speculation-hardening.ll +++ llvm/trunk/test/CodeGen/AArch64/speculation-hardening.ll @@ -0,0 +1,156 @@ +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure +; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure + +define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR { +; CHECK-LABEL: f +entry: +; SLH: cmp sp, #0 +; SLH: csetm x16, ne +; NOSLH-NOT: cmp sp, #0 +; NOSLH-NOT: csetm x16, ne + +; SLH: mov x17, sp +; SLH: and x17, x17, x16 +; SLH: mov sp, x17 +; NOSLH-NOT: mov x17, sp +; NOSLH-NOT: and x17, x17, x16 +; NOSLH-NOT: mov sp, x17 + %call = tail call i32 @tail_callee(i32 %i) +; SLH: cmp sp, #0 +; SLH: csetm x16, ne +; NOSLH-NOT: cmp sp, #0 +; NOSLH-NOT: csetm x16, ne + %cmp = icmp slt i32 %call, %N + br i1 %cmp, label %if.then, label %return +; GlobalISel lowers the branch to a b.ne sometimes instead of b.ge as expected.. +; CHECK: b.[[COND:(ge)|(lt)|(ne)]] + +if.then: ; preds = %entry +; NOSLH-NOT: csel x16, x16, xzr, {{(lt)|(ge)|(eq)}} +; SLH-DAG: csel x16, x16, xzr, {{(lt)|(ge)|(eq)}} + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i8, i8* %p, i64 %idxprom + %0 = load i8, i8* %arrayidx, align 1 +; CHECK-DAG: ldrb [[LOADED:w[0-9]+]], + %conv = zext i8 %0 to i32 + br label %return + +; SLH-DAG: csel x16, x16, xzr, [[COND]] +; NOSLH-NOT: csel x16, x16, xzr, [[COND]] +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %conv, %if.then ], [ 0, %entry ] +; SLH: mov x17, sp +; SLH: and x17, x17, x16 +; SLH: mov sp, x17 +; NOSLH-NOT: mov x17, sp +; NOSLH-NOT: and x17, x17, x16 +; NOSLH-NOT: mov sp, x17 + ret i32 %retval.0 +} + +; Make sure that for a tail call, taint doesn't get put into SP twice. +define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR { +; CHECK-LABEL: tail_caller: +; SLH: mov x17, sp +; SLH: and x17, x17, x16 +; SLH: mov sp, x17 +; NOSLH-NOT: mov x17, sp +; NOSLH-NOT: and x17, x17, x16 +; NOSLH-NOT: mov sp, x17 +; GlobalISel doesn't optimize tail calls (yet?), so only check that +; cross-call taint register setup code is missing if a tail call was +; actually produced. +; SLH: {{(bl tail_callee[[:space:]] cmp sp, #0)|(b tail_callee)}} +; SLH-NOT: cmp sp, #0 + %call = tail call i32 @tail_callee(i32 %a) + ret i32 %call +} + +declare i32 @tail_callee(i32) local_unnamed_addr + +; Verify that no cb(n)z/tb(n)z instructions are produced when implementing +; SLH +define i32 @compare_branch_zero(i32, i32) SLHATTR { +; CHECK-LABEL: compare_branch_zero + %3 = icmp eq i32 %0, 0 + br i1 %3, label %then, label %else +;SLH-NOT: cb{{n?}}z +;NOSLH: cb{{n?}}z +then: + %4 = sdiv i32 5, %1 + ret i32 %4 +else: + %5 = sdiv i32 %1, %0 + ret i32 %5 +} + +define i32 @test_branch_zero(i32, i32) SLHATTR { +; CHECK-LABEL: test_branch_zero + %3 = and i32 %0, 16 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %then, label %else +;SLH-NOT: tb{{n?}}z +;NOSLH: tb{{n?}}z +then: + %5 = sdiv i32 5, %1 + ret i32 %5 +else: + %6 = sdiv i32 %1, %0 + ret i32 %6 +} + +define i32 @landingpad(i32 %l0, i32 %l1) SLHATTR personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: landingpad +entry: +; SLH: cmp sp, #0 +; SLH: csetm x16, ne +; NOSLH-NOT: cmp sp, #0 +; NOSLH-NOT: csetm x16, ne +; CHECK: bl _Z10throwing_fv + invoke void @_Z10throwing_fv() + to label %exit unwind label %lpad +; SLH: cmp sp, #0 +; SLH: csetm x16, ne + +lpad: + %l4 = landingpad { i8*, i32 } + catch i8* null +; SLH: cmp sp, #0 +; SLH: csetm x16, ne +; NOSLH-NOT: cmp sp, #0 +; NOSLH-NOT: csetm x16, ne + %l5 = extractvalue { i8*, i32 } %l4, 0 + %l6 = tail call i8* @__cxa_begin_catch(i8* %l5) + %l7 = icmp sgt i32 %l0, %l1 + br i1 %l7, label %then, label %else +; GlobalISel lowers the branch to a b.ne sometimes instead of b.ge as expected.. +; CHECK: b.[[COND:(le)|(gt)|(ne)]] + +then: +; SLH-DAG: csel x16, x16, xzr, [[COND]] + %l9 = sdiv i32 %l0, %l1 + br label %postif + +else: +; SLH-DAG: csel x16, x16, xzr, {{(gt)|(le)|(eq)}} + %l11 = sdiv i32 %l1, %l0 + br label %postif + +postif: + %l13 = phi i32 [ %l9, %then ], [ %l11, %else ] + tail call void @__cxa_end_catch() + br label %exit + +exit: + %l15 = phi i32 [ %l13, %postif ], [ 0, %entry ] + ret i32 %l15 +} + +declare i32 @__gxx_personality_v0(...) +declare void @_Z10throwing_fv() local_unnamed_addr +declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr +declare void @__cxa_end_catch() local_unnamed_addr Index: llvm/trunk/test/CodeGen/AArch64/speculation-hardening.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/speculation-hardening.mir +++ llvm/trunk/test/CodeGen/AArch64/speculation-hardening.mir @@ -0,0 +1,117 @@ +# RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu \ +# RUN: -start-before aarch64-speculation-hardening -o - %s \ +# RUN: | FileCheck %s --dump-input-on-failure + +# Check that the speculation hardening pass generates code as expected for +# basic blocks ending with a variety of branch patterns: +# - (1) no branches (fallthrough) +# - (2) one unconditional branch +# - (3) one conditional branch + fall-through +# - (4) one conditional branch + one unconditional branch +# - other direct branches don't seem to be generated by the AArch64 codegen +--- | + define void @nobranch_fallthrough(i32 %a, i32 %b) speculative_load_hardening { + ret void + } + define void @uncondbranch(i32 %a, i32 %b) speculative_load_hardening { + ret void + } + define void @condbranch_fallthrough(i32 %a, i32 %b) speculative_load_hardening { + ret void + } + define void @condbranch_uncondbranch(i32 %a, i32 %b) speculative_load_hardening { + ret void + } + define void @indirectbranch(i32 %a, i32 %b) speculative_load_hardening { + ret void + } +... +--- +name: nobranch_fallthrough +tracksRegLiveness: true +body: | + ; CHECK-LABEL: nobranch_fallthrough + bb.0: + successors: %bb.1 + liveins: $w0, $w1 + ; CHECK-NOT: csel + bb.1: + liveins: $w0 + RET undef $lr, implicit $w0 +... +--- +name: uncondbranch +tracksRegLiveness: true +body: | + ; CHECK-LABEL: uncondbranch + bb.0: + successors: %bb.1 + liveins: $w0, $w1 + B %bb.1 + ; CHECK-NOT: csel + bb.1: + liveins: $w0 + RET undef $lr, implicit $w0 +... +--- +name: condbranch_fallthrough +tracksRegLiveness: true +body: | + ; CHECK-LABEL: condbranch_fallthrough + bb.0: + successors: %bb.1, %bb.2 + liveins: $w0, $w1 + $wzr = SUBSWrs renamable $w0, renamable $w1, 0, implicit-def $nzcv, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + ; CHECK: b.lt [[BB_LT_T:\.LBB[0-9_]+]] + + bb.1: + liveins: $nzcv, $w0 + ; CHECK: csel x16, x16, xzr, ge + RET undef $lr, implicit $w0 + bb.2: + liveins: $nzcv, $w0 + ; CHECK: csel x16, x16, xzr, lt + RET undef $lr, implicit $w0 +... +--- +name: condbranch_uncondbranch +tracksRegLiveness: true +body: | + ; CHECK-LABEL: condbranch_uncondbranch + bb.0: + successors: %bb.1, %bb.2 + liveins: $w0, $w1 + $wzr = SUBSWrs renamable $w0, renamable $w1, 0, implicit-def $nzcv, implicit-def $nzcv + Bcc 11, %bb.2, implicit $nzcv + B %bb.1, implicit $nzcv + ; CHECK: b.lt [[BB_LT_T:\.LBB[0-9_]+]] + + bb.1: + liveins: $nzcv, $w0 + ; CHECK: csel x16, x16, xzr, ge + RET undef $lr, implicit $w0 + bb.2: + liveins: $nzcv, $w0 + ; CHECK: csel x16, x16, xzr, lt + RET undef $lr, implicit $w0 +... +--- +name: indirectbranch +tracksRegLiveness: true +body: | + ; Check that no instrumentation is done on indirect branches (for now). + ; CHECK-LABEL: indirectbranch + bb.0: + successors: %bb.1, %bb.2 + liveins: $x0 + BR $x0 + bb.1: + liveins: $x0 + ; CHECK-NOT: csel + RET undef $lr, implicit $x0 + bb.2: + liveins: $x0 + ; CHECK-NOT: csel + RET undef $lr, implicit $x0 +...