Diff 157184

llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp

Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines
#define DEBUG_TYPE PASS_KEY		#define DEBUG_TYPE PASS_KEY

STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");		STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");		STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
STATISTIC(NumAddrRegsHardened,		STATISTIC(NumAddrRegsHardened,
"Number of address mode used registers hardaned");		"Number of address mode used registers hardaned");
STATISTIC(NumPostLoadRegsHardened,		STATISTIC(NumPostLoadRegsHardened,
"Number of post-load register values hardened");		"Number of post-load register values hardened");
		STATISTIC(NumCallsOrJumpsHardened,
		"Number of calls or jumps requiring extra hardening");
STATISTIC(NumInstsInserted, "Number of instructions inserted");		STATISTIC(NumInstsInserted, "Number of instructions inserted");
STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");		STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");

static cl::opt<bool> HardenEdgesWithLFENCE(		static cl::opt<bool> HardenEdgesWithLFENCE(
PASS_KEY "-lfence",		PASS_KEY "-lfence",
cl::desc(		cl::desc(
"Use LFENCE along each conditional edge to harden against speculative "		"Use LFENCE along each conditional edge to harden against speculative "
"loads rather than conditional movs and poisoned pointers."),		"loads rather than conditional movs and poisoned pointers."),
Show All 19 Lines	static cl::opt<bool> HardenInterprocedurally(
cl::init(true), cl::Hidden);		cl::init(true), cl::Hidden);

static cl::opt<bool>		static cl::opt<bool>
HardenLoads(PASS_KEY "-loads",		HardenLoads(PASS_KEY "-loads",
cl::desc("Sanitize loads from memory. When disable, no "		cl::desc("Sanitize loads from memory. When disable, no "
"significant security is provided."),		"significant security is provided."),
cl::init(true), cl::Hidden);		cl::init(true), cl::Hidden);

		static cl::opt<bool> HardenIndirectCallsAndJumps(
		PASS_KEY "-indirect",
		cl::desc("Harden indirect calls and jumps against using speculatively "
		"stored attacker controlled addresses. This is designed to "
		"mitigate Spectre v1.2 style attacks."),
		cl::init(true), cl::Hidden);

namespace llvm {		namespace llvm {

void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);		void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);

} // end namespace llvm		} // end namespace llvm

namespace {		namespace {

▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	private:

void hardenEdgesWithLFENCE(MachineFunction &MF);		void hardenEdgesWithLFENCE(MachineFunction &MF);

SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);		SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);

SmallVector<MachineInstr *, 16>		SmallVector<MachineInstr *, 16>
tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);		tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);

		void unfoldCallAndJumpLoads(MachineFunction &MF);

void hardenAllLoads(MachineFunction &MF);		void hardenAllLoads(MachineFunction &MF);

unsigned saveEFLAGS(MachineBasicBlock &MBB,		unsigned saveEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc Loc);		MachineBasicBlock::iterator InsertPt, DebugLoc Loc);
void restoreEFLAGS(MachineBasicBlock &MBB,		void restoreEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc Loc,		MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
unsigned OFReg);		unsigned OFReg);

Show All 12 Lines	private:
sinkPostLoadHardenedInst(MachineInstr &MI,		sinkPostLoadHardenedInst(MachineInstr &MI,
SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);		SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
bool canHardenRegister(unsigned Reg);		bool canHardenRegister(unsigned Reg);
unsigned hardenValueInRegister(unsigned Reg, MachineBasicBlock &MBB,		unsigned hardenValueInRegister(unsigned Reg, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt,		MachineBasicBlock::iterator InsertPt,
DebugLoc Loc);		DebugLoc Loc);
unsigned hardenPostLoad(MachineInstr &MI);		unsigned hardenPostLoad(MachineInstr &MI);
void hardenReturnInstr(MachineInstr &MI);		void hardenReturnInstr(MachineInstr &MI);
		void hardenIndirectCallOrJumpInstr(
		MachineInstr &MI,
		SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
};		};

} // end anonymous namespace		} // end anonymous namespace

char X86SpeculativeLoadHardeningPass::ID = 0;		char X86SpeculativeLoadHardeningPass::ID = 0;

void X86SpeculativeLoadHardeningPass::getAnalysisUsage(		void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
AnalysisUsage &AU) const {		AnalysisUsage &AU) const {
▲ Show 20 Lines • Show All 295 Lines • ▼ Show 20 Lines	for (MachineBasicBlock &MBB : MF) {
if (!MBB.isEHPad())		if (!MBB.isEHPad())
continue;		continue;
PS->SSA.AddAvailableValue(		PS->SSA.AddAvailableValue(
&MBB,		&MBB,
extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));		extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
}		}
}		}

		// If we are going to harden calls and jumps we need to unfold their memory
		// operands.
		if (HardenIndirectCallsAndJumps)
		unfoldCallAndJumpLoads(MF);

// Now harden all of the loads in the function using the predicate state.		// Now harden all of the loads in the function using the predicate state.
hardenAllLoads(MF);		hardenAllLoads(MF);

// Now rewrite all the uses of the pred state using the SSA updater so that		// Now rewrite all the uses of the pred state using the SSA updater so that
// we track updates through the CFG.		// we track updates through the CFG.
for (MachineInstr *CMovI : CMovs)		for (MachineInstr *CMovI : CMovs)
for (MachineOperand &Op : CMovI->operands()) {		for (MachineOperand &Op : CMovI->operands()) {
if (!Op.isReg() \|\| Op.getReg() != PS->InitialReg)		if (!Op.isReg() \|\| Op.getReg() != PS->InitialReg)
▲ Show 20 Lines • Show All 294 Lines • ▼ Show 20 Lines	for (const BlockCondInfo &Info : Infos) {
// Build a checking version of the successor.		// Build a checking version of the successor.
BuildCheckingBlockForSuccAndConds(MBB, UncondSucc, /SuccCount*/ 1,		BuildCheckingBlockForSuccAndConds(MBB, UncondSucc, /SuccCount*/ 1,
UncondBr, UncondBr, UncondCodeSeq);		UncondBr, UncondBr, UncondCodeSeq);
}		}

return CMovs;		return CMovs;
}		}

		/// Compute the register class for the unfolded load.
		///
		/// FIXME: This should probably live in X86InstrInfo, potentially by adding
		/// a way to unfold into a newly created vreg rather than requiring a register
		/// input.
		static const TargetRegisterClass *
		getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII,
		unsigned Opcode) {
		unsigned Index;
		unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
		Opcode, /UnfoldLoad/ true, /UnfoldStore/ false, &Index);
		const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
		return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
		}

		void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
		MachineFunction &MF) {
		for (MachineBasicBlock &MBB : MF)
		for (auto MII = MBB.instr_begin(), MIE = MBB.instr_end(); MII != MIE;) {
		// Grab a reference and increment the iterator so we can remove this
		// instruction if needed without disturbing the iteration.
		MachineInstr &MI = *MII++;

		// Must either be a call or a branch.
		if (!MI.isCall() && !MI.isBranch())
		continue;
		// We only care about loading variants of these instructions.
		if (!MI.mayLoad())
		continue;

		switch (MI.getOpcode()) {
		default: {
		LLVM_DEBUG(
		dbgs() << "ERROR: Found an unexpected loading branch or call "
		"instruction:\n";
		MI.dump(); dbgs() << "\n");
		report_fatal_error("Unexpected loading branch or call!");
		}

		case X86::FARCALL16m:
		case X86::FARCALL32m:
		case X86::FARCALL64:
		case X86::FARJMP16m:
		case X86::FARJMP32m:
		case X86::FARJMP64:
		// We cannot mitigate far jumps or calls, but we also don't expect them
		// to be vulnerable to Spectre v1.2 style attacks.
		continue;

		case X86::CALL16m:
		case X86::CALL16m_NT:
		case X86::CALL32m:
		case X86::CALL32m_NT:
		case X86::CALL64m:
		case X86::CALL64m_NT:
		case X86::JMP16m:
		case X86::JMP16m_NT:
		case X86::JMP32m:
		case X86::JMP32m_NT:
		case X86::JMP64m:
		case X86::JMP64m_NT:
		case X86::TAILJMPm64:
		case X86::TAILJMPm64_REX:
		case X86::TAILJMPm:
		case X86::TCRETURNmi64:
		case X86::TCRETURNmi: {
		// Use the generic unfold logic now that we know we're dealing with
		// expected instructions.
		// FIXME: We don't have test coverage for all of these!
		auto UnfoldedRC = getRegClassForUnfoldedLoad(MF, TII, MI.getOpcode());
		if (!UnfoldedRC) {
		LLVM_DEBUG(dbgs()
		<< "ERROR: Unable to unfold load from instruction:\n";
		MI.dump(); dbgs() << "\n");
		report_fatal_error("Unable to unfold load!");
		}
		unsigned Reg = MRI->createVirtualRegister(UnfoldedRC);
		SmallVector<MachineInstr *, 2> NewMIs;
		// If we were able to compute an unfolded reg class, any failure here
		// is just a programming error so just assert.
		bool Unfolded =
		TII->unfoldMemoryOperand(MF, MI, Reg, /UnfoldLoad/ true,
		/UnfoldStore/ false, NewMIs);
		(void)Unfolded;
		assert(Unfolded &&
		"Computed unfolded register class but failed to unfold");
		// Now stitch the new instructions into place and erase the old one.
		for (auto *NewMI : NewMIs)
		MBB.insert(MI.getIterator(), NewMI);
		MI.eraseFromParent();
		LLVM_DEBUG({
		dbgs() << "Unfolded load successfully into:\n";
		for (auto *NewMI : NewMIs) {
		NewMI->dump();
		dbgs() << "\n";
		}
		});
		continue;
		}
		}
		llvm_unreachable("Escaped switch with default!");
		}
		}

/// Returns true if the instruction has no behavior (specified or otherwise)		/// Returns true if the instruction has no behavior (specified or otherwise)
/// that is based on the value of any of its register operands		/// that is based on the value of any of its register operands
///		///
/// A classical example of something that is inherently not data invariant is an		/// A classical example of something that is inherently not data invariant is an
/// indirect jump -- the destination is loaded into icache based on the bits set		/// indirect jump -- the destination is loaded into icache based on the bits set
/// in the jump destination register.		/// in the jump destination register.
///		///
/// FIXME: This should become part of our instruction tables.		/// FIXME: This should become part of our instruction tables.
▲ Show 20 Lines • Show All 608 Lines • ▼ Show 20 Lines	for (MachineInstr &MI : MBB) {
unsigned HardenedReg = hardenPostLoad(MI);		unsigned HardenedReg = hardenPostLoad(MI);

// Mark the resulting hardened register as such so we don't re-harden.		// Mark the resulting hardened register as such so we don't re-harden.
AddrRegToHardenedReg[HardenedReg] = HardenedReg;		AddrRegToHardenedReg[HardenedReg] = HardenedReg;

continue;		continue;
}		}

		// Check for an indirect call or branch that may need its input hardened
		// even if we couldn't find the specific load used, or were able to avoid
		// hardening it for some reason. Note that here we cannot break out
		// afterward as we may still need to handle any call aspect of this
		// instruction.
		if ((MI.isCall() \|\| MI.isBranch()) && HardenIndirectCallsAndJumps)
		hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);

// After we finish processing the instruction and doing any hardening		// After we finish processing the instruction and doing any hardening
// necessary for it, we need to handle transferring the predicate state		// necessary for it, we need to handle transferring the predicate state
// into a call and recovering it after the call returns (if it returns).		// into a call and recovering it after the call returns (if it returns).
if (!MI.isCall())		if (!MI.isCall())
continue;		continue;

// If we're not hardening interprocedurally, we can just skip calls.		// If we're not hardening interprocedurally, we can just skip calls.
if (!HardenInterprocedurally)		if (!HardenInterprocedurally)
▲ Show 20 Lines • Show All 553 Lines • ▼ Show 20 Lines	void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
}		}

// Take our predicate state, shift it to the high 17 bits (so that we keep		// Take our predicate state, shift it to the high 17 bits (so that we keep
// pointers canonical) and merge it into RSP. This will allow the caller to		// pointers canonical) and merge it into RSP. This will allow the caller to
// extract it when we return (speculatively).		// extract it when we return (speculatively).
mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));		mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
}		}

		/// An attacker may speculatively store over a value that is then speculatively
		/// loaded and used as the target of an indirect call or jump instruction. This
		/// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
		/// in this paper:
		/// https://people.csail.mit.edu/vlk/spectre11.pdf
		///
		/// When this happens, the speculative execution of the call or jump will end up
		/// being steered to this attacker controlled address. While most such loads
		/// will be adequately hardened already, we want to ensure that they are
		/// definitively treated as needing post-load hardening. While address hardening
		/// is sufficient to prevent secret data from leaking to the attacker, it may
		/// not be sufficient to prevent an attacker from steering speculative
		/// execution. We forcibly unfolded all relevant loads above and so will always
		/// have an opportunity to post-load harden here, we just need to scan for cases
		/// not already flagged and add them.
		void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
		MachineInstr &MI,
		SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
		switch (MI.getOpcode()) {
		case X86::FARCALL16m:
		case X86::FARCALL32m:
		case X86::FARCALL64:
		case X86::FARJMP16m:
		case X86::FARJMP32m:
		case X86::FARJMP64:
		// We don't need to harden either far calls or far jumps as they are
		// safe from Spectre.
		return;

		default:
		break;
		}

		// We should never see a loading instruction at this point, as those should
		// have been unfolded.
		assert(!MI.mayLoad() && "Found a lingering loading instruction!");

		// If the first operand isn't a register, this is a branch or call
		// instruction with an immediate operand which doesn't need to be hardened.
		if (!MI.getOperand(0).isReg())
		return;

		// For all of these, the target register is the first operand of the
		// instruction.
		auto &TargetOp = MI.getOperand(0);
		unsigned OldTargetReg = TargetOp.getReg();

		// Try to lookup a hardened version of this register. We retain a reference
		// here as we want to update the map to track any newly computed hardened
		// register.
		unsigned &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];

		// If we don't have a hardened register yet, compute one. Otherwise, just use
		// the already hardened register.
		//
		// FIXME: It is a little suspect that we use partially hardened registers that
		// only feed addresses. The complexity of partial hardening with SHRX
		// continues to pile up. Should definitively measure its value and consider
		// eliminating it.
		if (!HardenedTargetReg)
		HardenedTargetReg = hardenValueInRegister(
		OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());

		// Set the target operand to the hardened register.
		TargetOp.setReg(HardenedTargetReg);

		++NumCallsOrJumpsHardened;
		}

INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,		INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
"X86 speculative load hardener", false, false)		"X86 speculative load hardener", false, false)
INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,		INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, DEBUG_TYPE,
"X86 speculative load hardener", false, false)		"X86 speculative load hardener", false, false)

FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() {		FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() {
return new X86SpeculativeLoadHardeningPass();		return new X86SpeculativeLoadHardeningPass();
}		}

llvm/trunk/test/CodeGen/X86/speculative-load-hardening-indirect.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening -data-sections \| FileCheck %s --check-prefix=X64			; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening -data-sections \| FileCheck %s --check-prefix=X64
				; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening -data-sections -mattr=+retpoline \| FileCheck %s --check-prefix=X64-RETPOLINE
	;			;
	; FIXME: Add support for 32-bit.			; FIXME: Add support for 32-bit.

	@global_fnptr = external global i32 ()*			@global_fnptr = external global i32 ()*

	@global_blockaddrs = constant [4 x i8*] [			@global_blockaddrs = constant [4 x i8*] [
	i8* blockaddress(@test_indirectbr_global, %bb0),			i8* blockaddress(@test_indirectbr_global, %bb0),
	i8* blockaddress(@test_indirectbr_global, %bb1),			i8* blockaddress(@test_indirectbr_global, %bb1),
	i8* blockaddress(@test_indirectbr_global, %bb2),			i8* blockaddress(@test_indirectbr_global, %bb2),
	i8* blockaddress(@test_indirectbr_global, %bb3)			i8* blockaddress(@test_indirectbr_global, %bb3)
	]			]

	define i32 @test_indirect_call(i32 ()** %ptr) nounwind {			define i32 @test_indirect_call(i32 ()** %ptr) nounwind {
	; X64-LABEL: test_indirect_call:			; X64-LABEL: test_indirect_call:
	; X64: # %bb.0: # %entry			; X64: # %bb.0: # %entry
	; X64-NEXT: pushq %rbx			; X64-NEXT: pushq %rax
	; X64-NEXT: movq %rsp, %rbx			; X64-NEXT: movq %rsp, %rax
	; X64-NEXT: movq $-1, %rax			; X64-NEXT: movq $-1, %rcx
	; X64-NEXT: sarq $63, %rbx			; X64-NEXT: sarq $63, %rax
	; X64-NEXT: orq %rbx, %rdi			; X64-NEXT: movq (%rdi), %rcx
	; X64-NEXT: callq *(%rdi)			; X64-NEXT: orq %rax, %rcx
	; X64-NEXT: shlq $47, %rbx			; X64-NEXT: shlq $47, %rax
	; X64-NEXT: orq %rbx, %rsp			; X64-NEXT: orq %rax, %rsp
	; X64-NEXT: popq %rbx			; X64-NEXT: callq *%rcx
				; X64-NEXT: movq %rsp, %rcx
				; X64-NEXT: sarq $63, %rcx
				; X64-NEXT: shlq $47, %rcx
				; X64-NEXT: orq %rcx, %rsp
				; X64-NEXT: popq %rcx
	; X64-NEXT: retq			; X64-NEXT: retq
				;
				; X64-RETPOLINE-LABEL: test_indirect_call:
				; X64-RETPOLINE: # %bb.0: # %entry
				; X64-RETPOLINE-NEXT: pushq %rax
				; X64-RETPOLINE-NEXT: movq %rsp, %rax
				; X64-RETPOLINE-NEXT: movq $-1, %rcx
				; X64-RETPOLINE-NEXT: sarq $63, %rax
				; X64-RETPOLINE-NEXT: movq (%rdi), %r11
				; X64-RETPOLINE-NEXT: orq %rax, %r11
				; X64-RETPOLINE-NEXT: shlq $47, %rax
				; X64-RETPOLINE-NEXT: orq %rax, %rsp
				; X64-RETPOLINE-NEXT: callq __llvm_retpoline_r11
				; X64-RETPOLINE-NEXT: movq %rsp, %rcx
				; X64-RETPOLINE-NEXT: sarq $63, %rcx
				; X64-RETPOLINE-NEXT: shlq $47, %rcx
				; X64-RETPOLINE-NEXT: orq %rcx, %rsp
				; X64-RETPOLINE-NEXT: popq %rcx
				; X64-RETPOLINE-NEXT: retq
	entry:			entry:
	%fp = load i32 (), i32 ()* %ptr			%fp = load i32 (), i32 ()* %ptr
	%v = call i32 %fp()			%v = call i32 %fp()
	ret i32 %v			ret i32 %v
	}			}

	define i32 @test_indirect_tail_call(i32 ()** %ptr) nounwind {			define i32 @test_indirect_tail_call(i32 ()** %ptr) nounwind {
	; X64-LABEL: test_indirect_tail_call:			; X64-LABEL: test_indirect_tail_call:
	; X64: # %bb.0: # %entry			; X64: # %bb.0: # %entry
	; X64-NEXT: movq %rsp, %rax			; X64-NEXT: movq %rsp, %rax
	; X64-NEXT: movq $-1, %rcx			; X64-NEXT: movq $-1, %rcx
	; X64-NEXT: sarq $63, %rax			; X64-NEXT: sarq $63, %rax
				; X64-NEXT: movq (%rdi), %rcx
				; X64-NEXT: orq %rax, %rcx
	; X64-NEXT: shlq $47, %rax			; X64-NEXT: shlq $47, %rax
	; X64-NEXT: orq %rax, %rsp			; X64-NEXT: orq %rax, %rsp
	; X64-NEXT: jmpq *(%rdi) # TAILCALL			; X64-NEXT: jmpq *%rcx # TAILCALL
				;
				; X64-RETPOLINE-LABEL: test_indirect_tail_call:
				; X64-RETPOLINE: # %bb.0: # %entry
				; X64-RETPOLINE-NEXT: movq %rsp, %rax
				; X64-RETPOLINE-NEXT: movq $-1, %rcx
				; X64-RETPOLINE-NEXT: sarq $63, %rax
				; X64-RETPOLINE-NEXT: movq (%rdi), %r11
				; X64-RETPOLINE-NEXT: orq %rax, %r11
				; X64-RETPOLINE-NEXT: shlq $47, %rax
				; X64-RETPOLINE-NEXT: orq %rax, %rsp
				; X64-RETPOLINE-NEXT: jmp __llvm_retpoline_r11 # TAILCALL
	entry:			entry:
	%fp = load i32 (), i32 ()* %ptr			%fp = load i32 (), i32 ()* %ptr
	%v = tail call i32 %fp()			%v = tail call i32 %fp()
	ret i32 %v			ret i32 %v
	}			}

	define i32 @test_indirect_call_global() nounwind {			define i32 @test_indirect_call_global() nounwind {
	; X64-LABEL: test_indirect_call_global:			; X64-LABEL: test_indirect_call_global:
	; X64: # %bb.0: # %entry			; X64: # %bb.0: # %entry
	; X64-NEXT: pushq %rax			; X64-NEXT: pushq %rax
	; X64-NEXT: movq %rsp, %rax			; X64-NEXT: movq %rsp, %rax
	; X64-NEXT: movq $-1, %rcx			; X64-NEXT: movq $-1, %rcx
	; X64-NEXT: sarq $63, %rax			; X64-NEXT: sarq $63, %rax
				; X64-NEXT: movq {{.*}}(%rip), %rcx
				; X64-NEXT: orq %rax, %rcx
	; X64-NEXT: shlq $47, %rax			; X64-NEXT: shlq $47, %rax
	; X64-NEXT: orq %rax, %rsp			; X64-NEXT: orq %rax, %rsp
	; X64-NEXT: callq {{.}}(%rip)			; X64-NEXT: callq *%rcx
	; X64-NEXT: movq %rsp, %rcx			; X64-NEXT: movq %rsp, %rcx
	; X64-NEXT: sarq $63, %rcx			; X64-NEXT: sarq $63, %rcx
	; X64-NEXT: shlq $47, %rcx			; X64-NEXT: shlq $47, %rcx
	; X64-NEXT: orq %rcx, %rsp			; X64-NEXT: orq %rcx, %rsp
	; X64-NEXT: popq %rcx			; X64-NEXT: popq %rcx
	; X64-NEXT: retq			; X64-NEXT: retq
				;
				; X64-RETPOLINE-LABEL: test_indirect_call_global:
				; X64-RETPOLINE: # %bb.0: # %entry
				; X64-RETPOLINE-NEXT: pushq %rax
				; X64-RETPOLINE-NEXT: movq %rsp, %rax
				; X64-RETPOLINE-NEXT: movq $-1, %rcx
				; X64-RETPOLINE-NEXT: sarq $63, %rax
				; X64-RETPOLINE-NEXT: movq {{.*}}(%rip), %r11
				; X64-RETPOLINE-NEXT: shlq $47, %rax
				; X64-RETPOLINE-NEXT: orq %rax, %rsp
				; X64-RETPOLINE-NEXT: callq __llvm_retpoline_r11
				; X64-RETPOLINE-NEXT: movq %rsp, %rcx
				; X64-RETPOLINE-NEXT: sarq $63, %rcx
				; X64-RETPOLINE-NEXT: shlq $47, %rcx
				; X64-RETPOLINE-NEXT: orq %rcx, %rsp
				; X64-RETPOLINE-NEXT: popq %rcx
				; X64-RETPOLINE-NEXT: retq
	entry:			entry:
	%fp = load i32 (), i32 ()* @global_fnptr			%fp = load i32 (), i32 ()* @global_fnptr
	%v = call i32 %fp()			%v = call i32 %fp()
	ret i32 %v			ret i32 %v
	}			}

	define i32 @test_indirect_tail_call_global() nounwind {			define i32 @test_indirect_tail_call_global() nounwind {
	; X64-LABEL: test_indirect_tail_call_global:			; X64-LABEL: test_indirect_tail_call_global:
	; X64: # %bb.0: # %entry			; X64: # %bb.0: # %entry
	; X64-NEXT: movq %rsp, %rax			; X64-NEXT: movq %rsp, %rax
	; X64-NEXT: movq $-1, %rcx			; X64-NEXT: movq $-1, %rcx
	; X64-NEXT: sarq $63, %rax			; X64-NEXT: sarq $63, %rax
				; X64-NEXT: movq {{.*}}(%rip), %rcx
				; X64-NEXT: orq %rax, %rcx
	; X64-NEXT: shlq $47, %rax			; X64-NEXT: shlq $47, %rax
	; X64-NEXT: orq %rax, %rsp			; X64-NEXT: orq %rax, %rsp
	; X64-NEXT: jmpq {{.}}(%rip) # TAILCALL			; X64-NEXT: jmpq *%rcx # TAILCALL
				;
				; X64-RETPOLINE-LABEL: test_indirect_tail_call_global:
				; X64-RETPOLINE: # %bb.0: # %entry
				; X64-RETPOLINE-NEXT: movq %rsp, %rax
				; X64-RETPOLINE-NEXT: movq $-1, %rcx
				; X64-RETPOLINE-NEXT: sarq $63, %rax
				; X64-RETPOLINE-NEXT: movq {{.*}}(%rip), %r11
				; X64-RETPOLINE-NEXT: shlq $47, %rax
				; X64-RETPOLINE-NEXT: orq %rax, %rsp
				; X64-RETPOLINE-NEXT: jmp __llvm_retpoline_r11 # TAILCALL
	entry:			entry:
	%fp = load i32 (), i32 ()* @global_fnptr			%fp = load i32 (), i32 ()* @global_fnptr
	%v = tail call i32 %fp()			%v = tail call i32 %fp()
	ret i32 %v			ret i32 %v
	}			}

	define i32 @test_indirectbr(i8** %ptr) nounwind {			define i32 @test_indirectbr(i8** %ptr) nounwind {
	; X64-LABEL: test_indirectbr:			; X64-LABEL: test_indirectbr:
	; X64: # %bb.0: # %entry			; X64: # %bb.0: # %entry
	; X64-NEXT: movq %rsp, %rcx			; X64-NEXT: movq %rsp, %rcx
	; X64-NEXT: movq $-1, %rax			; X64-NEXT: movq $-1, %rax
	; X64-NEXT: sarq $63, %rcx			; X64-NEXT: sarq $63, %rcx
	; X64-NEXT: orq %rcx, %rdi			; X64-NEXT: movq (%rdi), %rax
	; X64-NEXT: jmpq *(%rdi)			; X64-NEXT: orq %rcx, %rax
				; X64-NEXT: jmpq *%rax
	; X64-NEXT: .LBB4_1: # %bb0			; X64-NEXT: .LBB4_1: # %bb0
	; X64-NEXT: movl $2, %eax			; X64-NEXT: movl $2, %eax
	; X64-NEXT: jmp .LBB4_2			; X64-NEXT: jmp .LBB4_2
	; X64-NEXT: .LBB4_4: # %bb2			; X64-NEXT: .LBB4_4: # %bb2
	; X64-NEXT: movl $13, %eax			; X64-NEXT: movl $13, %eax
	; X64-NEXT: jmp .LBB4_2			; X64-NEXT: jmp .LBB4_2
	; X64-NEXT: .LBB4_5: # %bb3			; X64-NEXT: .LBB4_5: # %bb3
	; X64-NEXT: movl $42, %eax			; X64-NEXT: movl $42, %eax
	; X64-NEXT: jmp .LBB4_2			; X64-NEXT: jmp .LBB4_2
	; X64-NEXT: .LBB4_3: # %bb1			; X64-NEXT: .LBB4_3: # %bb1
	; X64-NEXT: movl $7, %eax			; X64-NEXT: movl $7, %eax
	; X64-NEXT: .LBB4_2: # %bb0			; X64-NEXT: .LBB4_2: # %bb0
	; X64-NEXT: shlq $47, %rcx			; X64-NEXT: shlq $47, %rcx
	; X64-NEXT: orq %rcx, %rsp			; X64-NEXT: orq %rcx, %rsp
	; X64-NEXT: retq			; X64-NEXT: retq
				;
				; X64-RETPOLINE-LABEL: test_indirectbr:
				; X64-RETPOLINE: # %bb.0: # %entry
	entry:			entry:
	%a = load i8, i8* %ptr			%a = load i8, i8* %ptr
	indirectbr i8* %a, [ label %bb0, label %bb1, label %bb2, label %bb3 ]			indirectbr i8* %a, [ label %bb0, label %bb1, label %bb2, label %bb3 ]

	bb0:			bb0:
	ret i32 2			ret i32 2

	bb1:			bb1:
	ret i32 7			ret i32 7

	bb2:			bb2:
	ret i32 13			ret i32 13

	bb3:			bb3:
	ret i32 42			ret i32 42
	}			}

	define i32 @test_indirectbr_global(i32 %idx) nounwind {			define i32 @test_indirectbr_global(i32 %idx) nounwind {
	; X64-LABEL: test_indirectbr_global:			; X64-LABEL: test_indirectbr_global:
	; X64: # %bb.0: # %entry			; X64: # %bb.0: # %entry
	; X64-NEXT: movq %rsp, %rcx			; X64-NEXT: movq %rsp, %rcx
	; X64-NEXT: movq $-1, %rax			; X64-NEXT: movq $-1, %rax
	; X64-NEXT: sarq $63, %rcx			; X64-NEXT: sarq $63, %rcx
	; X64-NEXT: movslq %edi, %rax			; X64-NEXT: movslq %edi, %rax
				; X64-NEXT: movq global_blockaddrs(,%rax,8), %rax
	; X64-NEXT: orq %rcx, %rax			; X64-NEXT: orq %rcx, %rax
	; X64-NEXT: jmpq *global_blockaddrs(,%rax,8)			; X64-NEXT: jmpq *%rax
	; X64-NEXT: .Ltmp0: # Block address taken			; X64-NEXT: .Ltmp0: # Block address taken
	; X64-NEXT: .LBB5_1: # %bb0			; X64-NEXT: .LBB5_1: # %bb0
	; X64-NEXT: movl $2, %eax			; X64-NEXT: movl $2, %eax
	; X64-NEXT: jmp .LBB5_2			; X64-NEXT: jmp .LBB5_2
	; X64-NEXT: .Ltmp1: # Block address taken			; X64-NEXT: .Ltmp1: # Block address taken
	; X64-NEXT: .LBB5_4: # %bb2			; X64-NEXT: .LBB5_4: # %bb2
	; X64-NEXT: movl $13, %eax			; X64-NEXT: movl $13, %eax
	; X64-NEXT: jmp .LBB5_2			; X64-NEXT: jmp .LBB5_2
	; X64-NEXT: .Ltmp2: # Block address taken			; X64-NEXT: .Ltmp2: # Block address taken
	; X64-NEXT: .LBB5_5: # %bb3			; X64-NEXT: .LBB5_5: # %bb3
	; X64-NEXT: movl $42, %eax			; X64-NEXT: movl $42, %eax
	; X64-NEXT: jmp .LBB5_2			; X64-NEXT: jmp .LBB5_2
	; X64-NEXT: .Ltmp3: # Block address taken			; X64-NEXT: .Ltmp3: # Block address taken
	; X64-NEXT: .LBB5_3: # %bb1			; X64-NEXT: .LBB5_3: # %bb1
	; X64-NEXT: movl $7, %eax			; X64-NEXT: movl $7, %eax
	; X64-NEXT: .LBB5_2: # %bb0			; X64-NEXT: .LBB5_2: # %bb0
	; X64-NEXT: shlq $47, %rcx			; X64-NEXT: shlq $47, %rcx
	; X64-NEXT: orq %rcx, %rsp			; X64-NEXT: orq %rcx, %rsp
	; X64-NEXT: retq			; X64-NEXT: retq
				;
				; X64-RETPOLINE-LABEL: test_indirectbr_global:
				; X64-RETPOLINE: # %bb.0: # %entry
				; X64-RETPOLINE-NEXT: movq %rsp, %rcx
				; X64-RETPOLINE-NEXT: movq $-1, %rax
				; X64-RETPOLINE-NEXT: sarq $63, %rcx
				; X64-RETPOLINE-NEXT: movslq %edi, %rdx
				; X64-RETPOLINE-NEXT: movq global_blockaddrs(,%rdx,8), %rdx
				; X64-RETPOLINE-NEXT: orq %rcx, %rdx
				; X64-RETPOLINE-NEXT: cmpq $2, %rdx
				; X64-RETPOLINE-NEXT: je .LBB6_5
				; X64-RETPOLINE-NEXT: # %bb.1: # %entry
				; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx
				; X64-RETPOLINE-NEXT: cmpq $3, %rdx
				; X64-RETPOLINE-NEXT: je .LBB6_6
				; X64-RETPOLINE-NEXT: # %bb.2: # %entry
				; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx
				; X64-RETPOLINE-NEXT: cmpq $4, %rdx
				; X64-RETPOLINE-NEXT: jne .LBB6_3
				; X64-RETPOLINE-NEXT: .Ltmp0: # Block address taken
				; X64-RETPOLINE-NEXT: # %bb.7: # %bb3
				; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx
				; X64-RETPOLINE-NEXT: movl $42, %eax
				; X64-RETPOLINE-NEXT: jmp .LBB6_4
				; X64-RETPOLINE-NEXT: .Ltmp1: # Block address taken
				; X64-RETPOLINE-NEXT: .LBB6_5: # %bb1
				; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx
				; X64-RETPOLINE-NEXT: movl $7, %eax
				; X64-RETPOLINE-NEXT: jmp .LBB6_4
				; X64-RETPOLINE-NEXT: .Ltmp2: # Block address taken
				; X64-RETPOLINE-NEXT: .LBB6_6: # %bb2
				; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx
				; X64-RETPOLINE-NEXT: movl $13, %eax
				; X64-RETPOLINE-NEXT: jmp .LBB6_4
				; X64-RETPOLINE-NEXT: .Ltmp3: # Block address taken
				; X64-RETPOLINE-NEXT: .LBB6_3: # %bb0
				; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx
				; X64-RETPOLINE-NEXT: movl $2, %eax
				; X64-RETPOLINE-NEXT: .LBB6_4: # %bb0
				; X64-RETPOLINE-NEXT: shlq $47, %rcx
				; X64-RETPOLINE-NEXT: orq %rcx, %rsp
				; X64-RETPOLINE-NEXT: retq
	entry:			entry:
	%ptr = getelementptr [4 x i8], [4 x i8]* @global_blockaddrs, i32 0, i32 %idx			%ptr = getelementptr [4 x i8], [4 x i8]* @global_blockaddrs, i32 0, i32 %idx
	%a = load i8, i8* %ptr			%a = load i8, i8* %ptr
	indirectbr i8* %a, [ label %bb0, label %bb1, label %bb2, label %bb3 ]			indirectbr i8* %a, [ label %bb0, label %bb1, label %bb2, label %bb3 ]

	bb0:			bb0:
	ret i32 2			ret i32 2

	Show All 15 Lines
	; X64-NEXT: movq %rsp, %rcx			; X64-NEXT: movq %rsp, %rcx
	; X64-NEXT: movq $-1, %rax			; X64-NEXT: movq $-1, %rax
	; X64-NEXT: sarq $63, %rcx			; X64-NEXT: sarq $63, %rcx
	; X64-NEXT: cmpl $3, %edi			; X64-NEXT: cmpl $3, %edi
	; X64-NEXT: ja .LBB6_2			; X64-NEXT: ja .LBB6_2
	; X64-NEXT: # %bb.1: # %entry			; X64-NEXT: # %bb.1: # %entry
	; X64-NEXT: cmovaq %rax, %rcx			; X64-NEXT: cmovaq %rax, %rcx
	; X64-NEXT: movl %edi, %eax			; X64-NEXT: movl %edi, %eax
				; X64-NEXT: movq .LJTI6_0(,%rax,8), %rax
	; X64-NEXT: orq %rcx, %rax			; X64-NEXT: orq %rcx, %rax
	; X64-NEXT: jmpq *.LJTI6_0(,%rax,8)			; X64-NEXT: jmpq *%rax
	; X64-NEXT: .LBB6_3: # %bb1			; X64-NEXT: .LBB6_3: # %bb1
	; X64-NEXT: movl $7, %eax			; X64-NEXT: movl $7, %eax
	; X64-NEXT: jmp .LBB6_4			; X64-NEXT: jmp .LBB6_4
	; X64-NEXT: .LBB6_2: # %bb0			; X64-NEXT: .LBB6_2: # %bb0
	; X64-NEXT: cmovbeq %rax, %rcx			; X64-NEXT: cmovbeq %rax, %rcx
	; X64-NEXT: movl $2, %eax			; X64-NEXT: movl $2, %eax
	; X64-NEXT: jmp .LBB6_4			; X64-NEXT: jmp .LBB6_4
	; X64-NEXT: .LBB6_5: # %bb2			; X64-NEXT: .LBB6_5: # %bb2
	; X64-NEXT: movl $13, %eax			; X64-NEXT: movl $13, %eax
	; X64-NEXT: jmp .LBB6_4			; X64-NEXT: jmp .LBB6_4
	; X64-NEXT: .LBB6_6: # %bb3			; X64-NEXT: .LBB6_6: # %bb3
	; X64-NEXT: movl $42, %eax			; X64-NEXT: movl $42, %eax
	; X64-NEXT: jmp .LBB6_4			; X64-NEXT: jmp .LBB6_4
	; X64-NEXT: .LBB6_7: # %bb5			; X64-NEXT: .LBB6_7: # %bb5
	; X64-NEXT: movl $11, %eax			; X64-NEXT: movl $11, %eax
	; X64-NEXT: .LBB6_4: # %bb1			; X64-NEXT: .LBB6_4: # %bb1
	; X64-NEXT: shlq $47, %rcx			; X64-NEXT: shlq $47, %rcx
	; X64-NEXT: orq %rcx, %rsp			; X64-NEXT: orq %rcx, %rsp
	; X64-NEXT: retq			; X64-NEXT: retq
				;
				; X64-RETPOLINE-LABEL: test_switch_jumptable:
				; X64-RETPOLINE: # %bb.0: # %entry
				; X64-RETPOLINE-NEXT: movq %rsp, %rcx
				; X64-RETPOLINE-NEXT: movq $-1, %rax
				; X64-RETPOLINE-NEXT: sarq $63, %rcx
				; X64-RETPOLINE-NEXT: cmpl $1, %edi
				; X64-RETPOLINE-NEXT: jg .LBB7_4
				; X64-RETPOLINE-NEXT: # %bb.1: # %entry
				; X64-RETPOLINE-NEXT: cmovgq %rax, %rcx
				; X64-RETPOLINE-NEXT: testl %edi, %edi
				; X64-RETPOLINE-NEXT: je .LBB7_8
				; X64-RETPOLINE-NEXT: # %bb.2: # %entry
				; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx
				; X64-RETPOLINE-NEXT: cmpl $1, %edi
				; X64-RETPOLINE-NEXT: jne .LBB7_6
				; X64-RETPOLINE-NEXT: # %bb.3: # %bb2
				; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx
				; X64-RETPOLINE-NEXT: movl $13, %eax
				; X64-RETPOLINE-NEXT: jmp .LBB7_7
				; X64-RETPOLINE-NEXT: .LBB7_4: # %entry
				; X64-RETPOLINE-NEXT: cmovleq %rax, %rcx
				; X64-RETPOLINE-NEXT: cmpl $2, %edi
				; X64-RETPOLINE-NEXT: je .LBB7_9
				; X64-RETPOLINE-NEXT: # %bb.5: # %entry
				; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx
				; X64-RETPOLINE-NEXT: cmpl $3, %edi
				; X64-RETPOLINE-NEXT: jne .LBB7_6
				; X64-RETPOLINE-NEXT: # %bb.10: # %bb5
				; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx
				; X64-RETPOLINE-NEXT: movl $11, %eax
				; X64-RETPOLINE-NEXT: jmp .LBB7_7
				; X64-RETPOLINE-NEXT: .LBB7_6:
				; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx
				; X64-RETPOLINE-NEXT: movl $2, %eax
				; X64-RETPOLINE-NEXT: jmp .LBB7_7
				; X64-RETPOLINE-NEXT: .LBB7_8: # %bb1
				; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx
				; X64-RETPOLINE-NEXT: movl $7, %eax
				; X64-RETPOLINE-NEXT: jmp .LBB7_7
				; X64-RETPOLINE-NEXT: .LBB7_9: # %bb3
				; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx
				; X64-RETPOLINE-NEXT: movl $42, %eax
				; X64-RETPOLINE-NEXT: .LBB7_7: # %bb0
				; X64-RETPOLINE-NEXT: shlq $47, %rcx
				; X64-RETPOLINE-NEXT: orq %rcx, %rsp
				; X64-RETPOLINE-NEXT: retq
	entry:			entry:
	switch i32 %idx, label %bb0 [			switch i32 %idx, label %bb0 [
	i32 0, label %bb1			i32 0, label %bb1
	i32 1, label %bb2			i32 1, label %bb2
	i32 2, label %bb3			i32 2, label %bb3
	i32 3, label %bb5			i32 3, label %bb5
	]			]

	Show All 15 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[x86/SLH] Teach the x86 speculative load hardening pass to harden against v1.2 BCBS attacks directly.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 157184

llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp

llvm/trunk/test/CodeGen/X86/speculative-load-hardening-indirect.ll

This is an archive of the discontinued LLVM Phabricator instance.

[x86/SLH] Teach the x86 speculative load hardening pass to harden against v1.2 BCBS attacks directly.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 157184

llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp

llvm/trunk/test/CodeGen/X86/speculative-load-hardening-indirect.ll

[x86/SLH] Teach the x86 speculative load hardening pass to harden against v1.2 BCBS attacks directly.
ClosedPublic