diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -62,6 +62,7 @@ X86SelectionDAGInfo.cpp X86ShuffleDecodeConstantPool.cpp X86SpeculativeLoadHardening.cpp + X86SpeculativeExecutionSideEffectSuppression.cpp X86Subtarget.cpp X86TargetMachine.cpp X86TargetObjectFile.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -138,6 +138,7 @@ X86RegisterBankInfo &); FunctionPass *createX86SpeculativeLoadHardeningPass(); +FunctionPass *createX86SpeculativeExecutionSideEffectSuppressionPass(); void initializeEvexToVexInstPassPass(PassRegistry &); void initializeFixupBWInstPassPass(PassRegistry &); @@ -154,6 +155,7 @@ void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); void initializeX86OptimizeLEAPassPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); +void initializeX86SpeculativeExecutionSideEffectSuppressionPassPass(PassRegistry &); namespace X86AS { enum : unsigned { diff --git a/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp @@ -0,0 +1,208 @@ +//===-- X86SpeculativeExecutionSideEffectSuppression.cpp ------------------===// +//===-- X86 Speculative Execution Side Effect Suppression Pass -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file contains the X86 implementation of the speculative execution side +/// effect suppression mitigation. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Pass.h" +using namespace llvm; + +#define DEBUG_TYPE "x86-seses" + +STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted"); + +static cl::opt EnableSpeculativeExecutionSideEffectSuppression( + "x86-seses-enable", + cl::desc("Force enable speculative execution side effect suppresion"), + cl::init(false), cl::Hidden); + +static cl::opt OnlyFirstLFENCE( + "x86-seses-only-first-lfence", + cl::desc( + "Omit all lfences other than the first to be placed in a basic block."), + cl::init(false), cl::Hidden); + +static cl::opt OnlyLFENCENonConst( + "x86-seses-only-lfence-non-const", + cl::desc("Only lfence before groups of terminators where at least one " + "branch instruction has an input to the addressing mode that is a " + "register other than %rip."), + cl::init(false), cl::Hidden); + +static cl::opt + OmitBranchLFENCEs("x86-seses-omit-branch-lfences", + cl::desc("Omit all lfences before branch instructions."), + cl::init(false), cl::Hidden); + +static cl::opt + OmitLFENCEInBasicBlocksWithoutLoads("x86-seses-omit-lfence-in-bb-without-loads", + cl::desc("Omit LFENCE in basic blocks without any loads even if there are stores."), + cl::init(false), cl::Hidden); + +static cl::opt + OmitLFENCEInBasicBlocksWithOneLoadAndNoStores("x86-seses-omit-lfence-in-bb-with-one-load-no-stores", + cl::desc("Don't LFENCE in basic blocks with one load and no stores."), + cl::init(false), cl::Hidden); + +static cl::opt LFENCEDataInvariantInstructions( + "x86-seses-lfence-data-invariant-inst", + cl::desc("LFENCE before instructions that are data invariant."), + cl::init(true), cl::Hidden); + +static bool hasConstantAddressingMode(const MachineInstr &MI); + +namespace { + +class X86SpeculativeExecutionSideEffectSuppressionPass + : public MachineFunctionPass { +public: + X86SpeculativeExecutionSideEffectSuppressionPass() : MachineFunctionPass(ID) {} + + static char ID; + StringRef getPassName() const override { + return "X86 Speculative Execution Side Effect Suppression Pass"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // namespace + +char X86SpeculativeExecutionSideEffectSuppressionPass::ID = 0; + +bool X86SpeculativeExecutionSideEffectSuppressionPass::runOnMachineFunction( + MachineFunction &MF) { + if (!EnableSpeculativeExecutionSideEffectSuppression) + return false; + + LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() + << " **********\n"); + bool Modified = false; + const X86Subtarget &Subtarget = MF.getSubtarget(); + const X86InstrInfo *TII = Subtarget.getInstrInfo(); + for (MachineBasicBlock &MBB : MF) { + // Let's do a pass over the basic block to see if we can skip LFENCEing it + // based on the number of loads and stores. + if (OmitLFENCEInBasicBlocksWithoutLoads || OmitLFENCEInBasicBlocksWithOneLoadAndNoStores) { + + int LoadCount = 0; + int StoreCount = 0; + + for (const MachineInstr& MI: MBB) { + if (MI.mayLoad()) { + LoadCount++; + } + if (MI.mayStore()) { + StoreCount++; + } + } + + if (OmitLFENCEInBasicBlocksWithoutLoads && LoadCount == 0) { + continue; + } + + if (OmitLFENCEInBasicBlocksWithOneLoadAndNoStores && StoreCount == 0 && LoadCount <= 1) { + continue; + } + } + + MachineInstr *FirstTerminator = nullptr; + + for (auto &MI : MBB) { + // If the current instruction is data invariant and we are not LFENCEing + // data invariant instructions, then continue to the next instruction. + if (!LFENCEDataInvariantInstructions && + (TII->isDataInvariant(MI) || TII->isDataInvariantLoad(MI))) { + continue; + } + + // We want to put an LFENCE before any instruction that + // may load or store. This LFENCE is intended to avoid leaking any secret + // data due to a given load or store. This results in closing the cache + // and memory timing side channels. We will treat terminators that load + // or store separately. + if (MI.mayLoadOrStore() && !MI.isTerminator()) { + BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE)); + NumLFENCEsInserted++; + Modified = true; + if (OnlyFirstLFENCE) { + break; + } + } + // The following section will be LFENCEing before groups of terminators + // that include branches. This will close the branch prediction side + // channels since we will prevent code executing after misspeculation as + // a result of the LFENCEs placed with this logic. + + // Keep track of the first terminator in a basic block since if we need + // to LFENCE the terminators in this basic block we must add the + // instruction before the first terminator in the basic block (as + // opposed to before the terminator that indicates an LFENCE is + // required). An example of why this is necessary is that the + // X86InstrInfo::analyzeBranch method assumes all terminators are grouped + // together and terminates it's analysis once the first non-termintor + // instruction is found. + if (MI.isTerminator() && FirstTerminator == nullptr) { + FirstTerminator = &MI; + } + + // Look for branch instructions that will require an LFENCE to be put + // before this basic block's terminators. + if (!MI.isBranch() || OmitBranchLFENCEs) { + // This isn't a branch or we're not putting LFENCEs before branches. + continue; + } + + if (OnlyLFENCENonConst && hasConstantAddressingMode(MI)) { + // This is a branch, but it only has constant addressing mode and we're + // not adding LFENCEs before such branches. + continue; + } + // This branch requires adding an LFENCE. + BuildMI(MBB, FirstTerminator, DebugLoc(), TII->get(X86::LFENCE)); + NumLFENCEsInserted++; + Modified = true; + break; + } + } + + return Modified; +} + +// This function returns whether the passed instruction uses a memory addressing +// mode that is constant. We treat all memory addressing modes that read +// from a register that is not %rip as non-constant. Note that the use +// of the EFLAGS register results in an addressing mode being considered +// non-constant, therefore all JCC instructions will return false from this +// function since one of their operands will always be the EFLAGS register. +static bool hasConstantAddressingMode(const MachineInstr &MI) { + for (const MachineOperand &MO : MI.uses()) { + if (MO.isReg() && X86::RIP != MO.getReg()) { + return false; + } + } + return true; +} + +FunctionPass *llvm::createX86SpeculativeExecutionSideEffectSuppressionPass() { + return new X86SpeculativeExecutionSideEffectSuppressionPass(); +} + +INITIALIZE_PASS(X86SpeculativeExecutionSideEffectSuppressionPass, "x86-seses", + "X86 Speculative Execution Side Effect Suppresion Pass", false, + false) diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -80,6 +80,7 @@ initializeX86DomainReassignmentPass(PR); initializeX86AvoidSFBPassPass(PR); initializeX86SpeculativeLoadHardeningPassPass(PR); + initializeX86SpeculativeExecutionSideEffectSuppressionPassPass(PR); initializeX86FlagsCopyLoweringPassPass(PR); initializeX86CondBrFoldingPassPass(PR); initializeX86OptimizeLEAPassPass(PR); @@ -526,6 +527,16 @@ const Triple &TT = TM->getTargetTriple(); const MCAsmInfo *MAI = TM->getMCAsmInfo(); + // The X86 Speculative Execution Pass must run after all control + // flow graph modifying passes. As a result it was listed to run right before + // the X86 Retpoline Thunks pass. The reason it must run after control flow + // graph modifications is that the model of LFENCE in LLVM has to be updated + // (FIXME: Update model of LFENCE). Currently the placement of this pass was + // hand checked to ensure that the subsequent passes don't move the code + // around the LFENCEs in a way that will hurt the correctness of this pass. + // This placement has been shown to work based on hand inspection of the + // codegen output. + addPass(createX86SpeculativeExecutionSideEffectSuppressionPass()); addPass(createX86RetpolineThunksPass()); // Insert extra int3 instructions after trailing call instructions to avoid diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -72,6 +72,7 @@ ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: X86 Speculative Execution Side Effect Suppression Pass ; CHECK-NEXT: X86 Retpoline Thunks ; CHECK-NEXT: Check CFA info and insert CFI instructions if needed ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll --- a/llvm/test/CodeGen/X86/O3-pipeline.ll +++ b/llvm/test/CodeGen/X86/O3-pipeline.ll @@ -181,6 +181,7 @@ ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: X86 Speculative Execution Side Effect Suppression Pass ; CHECK-NEXT: X86 Retpoline Thunks ; CHECK-NEXT: Check CFA info and insert CFI instructions if needed ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-branch-lfences.ll b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-branch-lfences.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-branch-lfences.ll @@ -0,0 +1,160 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-omit-branch-lfences %s -o - | FileCheck %s + +define dso_local void @_Z4buzzv() { +entry: + %a = alloca i32, align 4 + store i32 10, i32* %a, align 4 + ret void +} + +define dso_local i32 @_Z3barPi(i32* %p) { +entry: + %retval = alloca i32, align 4 + %p.addr = alloca i32*, align 8 + %a = alloca [4 x i32], align 16 + %len = alloca i32, align 4 + store i32* %p, i32** %p.addr, align 8 + %0 = bitcast [4 x i32]* %a to i8* + store i32 4, i32* %len, align 4 + %1 = load i32*, i32** %p.addr, align 8 + %2 = load i32, i32* %1, align 4 + %3 = load i32, i32* %len, align 4 + %cmp = icmp slt i32 %2, %3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %4 = load i32*, i32** %p.addr, align 8 + %5 = load i32, i32* %4, align 4 + %idxprom = sext i32 %5 to i64 + %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i64 0, i64 %idxprom + %6 = load i32, i32* %arrayidx, align 4 + store i32 %6, i32* %retval, align 4 + br label %return + +if.else: ; preds = %entry + store i32 -1, i32* %retval, align 4 + br label %return + +return: ; preds = %if.else, %if.then + %7 = load i32, i32* %retval, align 4 + ret i32 %7 +} + +define dso_local i32 (i32*)* @_Z3bazv() { +entry: + %p = alloca i32 (i32*)*, align 8 + store i32 (i32*)* @_Z3barPi, i32 (i32*)** %p, align 8 + call void asm sideeffect "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32 (i32*)** %p, i32 (i32*)** %p) #3, !srcloc !2 + %0 = load i32 (i32*)*, i32 (i32*)** %p, align 8 + ret i32 (i32*)* %0 +} + +define dso_local void @_Z3fooPi(i32* %p) { +entry: + %p.addr = alloca i32*, align 8 + %t = alloca i32 (i32*)*, align 8 + store i32* %p, i32** %p.addr, align 8 + %call = call i32 (i32*)* @_Z3bazv() + store i32 (i32*)* %call, i32 (i32*)** %t, align 8 + %0 = load i32 (i32*)*, i32 (i32*)** %t, align 8 + %1 = load i32*, i32** %p.addr, align 8 + %call1 = call i32 %0(i32* %1) + ret void +} + +!2 = !{i32 233} + +; CHECK: .globl _Z4buzzv # -- Begin function _Z4buzzv +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z4buzzv,@function +; CHECK:_Z4buzzv: # @_Z4buzzv +; CHECK:.L_Z4buzzv$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movl $10, -4(%rsp) +; CHECK: retq +; CHECK:.Lfunc_end0: +; CHECK: .size _Z4buzzv, .Lfunc_end0-_Z4buzzv +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3barPi # -- Begin function _Z3barPi +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3barPi,@function +; CHECK:_Z3barPi: # @_Z3barPi +; CHECK:.L_Z3barPi$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movq %rdi, -40(%rsp) +; CHECK: lfence +; CHECK: movl $4, -28(%rsp) +; CHECK: lfence +; CHECK: cmpl $3, (%rdi) +; CHECK: jg .LBB1_2 +; CHECK:# %bb.1: # %if.then +; CHECK: lfence +; CHECK: movq -40(%rsp), %rax +; CHECK: lfence +; CHECK: movslq (%rax), %rax +; CHECK: lfence +; CHECK: movl -24(%rsp,%rax,4), %eax +; CHECK: lfence +; CHECK: movl %eax, -44(%rsp) +; CHECK: lfence +; CHECK: movl -44(%rsp), %eax +; CHECK: retq +; CHECK:.LBB1_2: # %if.else +; CHECK: lfence +; CHECK: movl $-1, -44(%rsp) +; CHECK: lfence +; CHECK: movl -44(%rsp), %eax +; CHECK: retq +; CHECK:.Lfunc_end1: +; CHECK: .size _Z3barPi, .Lfunc_end1-_Z3barPi +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3bazv # -- Begin function _Z3bazv +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3bazv,@function +; CHECK:_Z3bazv: # @_Z3bazv +; CHECK:.L_Z3bazv$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movq $.L_Z3barPi$local, -8(%rsp) +; CHECK: lfence +; CHECK: #APP +; CHECK: #NO_APP +; CHECK: lfence +; CHECK: movq -8(%rsp), %rax +; CHECK: retq +; CHECK:.Lfunc_end2: +; CHECK: .size _Z3bazv, .Lfunc_end2-_Z3bazv +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3fooPi # -- Begin function _Z3fooPi +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3fooPi,@function +; CHECK:_Z3fooPi: # @_Z3fooPi +; CHECK:.L_Z3fooPi$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: subq $24, %rsp +; CHECK: .cfi_def_cfa_offset 32 +; CHECK: lfence +; CHECK: movq %rdi, 8(%rsp) +; CHECK: callq .L_Z3bazv$local +; CHECK: lfence +; CHECK: movq %rax, 16(%rsp) +; CHECK: lfence +; CHECK: movq 8(%rsp), %rdi +; CHECK: callq *%rax +; CHECK: addq $24, %rsp +; CHECK: .cfi_def_cfa_offset 8 +; CHECK: retq +; CHECK:.Lfunc_end3: +; CHECK: .size _Z3fooPi, .Lfunc_end3-_Z3fooPi +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .section ".note.GNU-stack","",@progbits diff --git a/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll @@ -0,0 +1,83 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-omit-lfence-in-bb-with-one-load-no-stores %s -o - | FileCheck %s --check-prefix=CHECK-FLAGGED +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable %s -o - | FileCheck %s --check-prefix=CHECK-FULL + +define dso_local void @_Z3fooPi(i32* %p) #0 { +entry: + %p.addr = alloca i32*, align 8 + %a = alloca i32, align 4 + store i32* %p, i32** %p.addr, align 8 + %0 = load i32*, i32** %p.addr, align 8 + %1 = load i32, i32* %0, align 4 + %cmp = icmp eq i32 %1, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %2 = load i32*, i32** %p.addr, align 8 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +; CHECK-FLAGGED: .globl _Z3fooPi # -- Begin function _Z3fooPi +; CHECK-FLAGGED: .p2align 4, 0x90 +; CHECK-FLAGGED: .type _Z3fooPi,@function +; CHECK-FLAGGED:_Z3fooPi: # @_Z3fooPi +; CHECK-FLAGGED:.L_Z3fooPi$local: +; CHECK-FLAGGED: .cfi_startproc +; CHECK-FLAGGED:# %bb.0: # %entry +; CHECK-FLAGGED: lfence +; CHECK-FLAGGED: pushq %rbp +; CHECK-FLAGGED: .cfi_def_cfa_offset 16 +; CHECK-FLAGGED: .cfi_offset %rbp, -16 +; CHECK-FLAGGED: movq %rsp, %rbp +; CHECK-FLAGGED: .cfi_def_cfa_register %rbp +; CHECK-FLAGGED: lfence +; CHECK-FLAGGED: movq %rdi, -8(%rbp) +; CHECK-FLAGGED: lfence +; CHECK-FLAGGED: movq -8(%rbp), %rax +; CHECK-FLAGGED: lfence +; CHECK-FLAGGED: cmpl $0, (%rax) +; CHECK-FLAGGED: lfence +; CHECK-FLAGGED: jne .LBB0_2 +; CHECK-FLAGGED:# %bb.1: # %if.then +; CHECK-FLAGGED:.LBB0_2: # %if.end +; CHECK-FLAGGED: popq %rbp +; CHECK-FLAGGED: .cfi_def_cfa %rsp, 8 +; CHECK-FLAGGED: retq +; CHECK-FLAGGED:.Lfunc_end0: +; CHECK-FLAGGED: .size _Z3fooPi, .Lfunc_end0-_Z3fooPi +; CHECK-FLAGGED: .cfi_endproc + +; CHECK-FULL: .globl _Z3fooPi # -- Begin function _Z3fooPi +; CHECK-FULL: .p2align 4, 0x90 +; CHECK-FULL: .type _Z3fooPi,@function +; CHECK-FULL:_Z3fooPi: # @_Z3fooPi +; CHECK-FULL:.L_Z3fooPi$local: +; CHECK-FULL: .cfi_startproc +; CHECK-FULL:# %bb.0: # %entry +; CHECK-FULL: lfence +; CHECK-FULL: pushq %rbp +; CHECK-FULL: .cfi_def_cfa_offset 16 +; CHECK-FULL: .cfi_offset %rbp, -16 +; CHECK-FULL: movq %rsp, %rbp +; CHECK-FULL: .cfi_def_cfa_register %rbp +; CHECK-FULL: lfence +; CHECK-FULL: movq %rdi, -8(%rbp) +; CHECK-FULL: lfence +; CHECK-FULL: movq -8(%rbp), %rax +; CHECK-FULL: lfence +; CHECK-FULL: cmpl $0, (%rax) +; CHECK-FULL: lfence +; CHECK-FULL: jne .LBB0_2 +; CHECK-FULL:# %bb.1: # %if.then +; CHECK-FULL:.LBB0_2: # %if.end +; CHECK-FULL: lfence +; CHECK-FULL: popq %rbp +; CHECK-FULL: .cfi_def_cfa %rsp, 8 +; CHECK-FULL: retq +; CHECK-FULL:.Lfunc_end0: +; CHECK-FULL: .size _Z3fooPi, .Lfunc_end0-_Z3fooPi +; CHECK-FULL: .cfi_endproc diff --git a/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-without-loads.ll b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-without-loads.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-without-loads.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-omit-lfence-in-bb-without-loads %s -o - | FileCheck %s --check-prefix=CHECK-FLAGGED +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable %s -o - | FileCheck %s --check-prefix=CHECK-FULL + +define dso_local void @_Z4buzzv() { +entry: + %a = alloca i32, align 4 + store i32 10, i32* %a, align 4 + ret void +} + +; CHECK-FLAGGED: .globl _Z4buzzv # -- Begin function _Z4buzzv +; CHECK-FLAGGED: .p2align 4, 0x90 +; CHECK-FLAGGED: .type _Z4buzzv,@function +; CHECK-FLAGGED:_Z4buzzv: # @_Z4buzzv +; CHECK-FLAGGED:.L_Z4buzzv$local: +; CHECK-FLAGGED: .cfi_startproc +; CHECK-FLAGGED:# %bb.0: # %entry +; CHECK-FLAGGED: movl $10, -4(%rsp) +; CHECK-FLAGGED: retq +; CHECK-FLAGGED:.Lfunc_end0: +; CHECK-FLAGGED: .size _Z4buzzv, .Lfunc_end0-_Z4buzzv +; CHECK-FLAGGED: .cfi_endproc +; CHECK-FLAGGED: # -- End function +; CHECK-FLAGGED: .section ".note.GNU-stack","",@progbits + + +; CHECK-FULL: .globl _Z4buzzv # -- Begin function _Z4buzzv +; CHECK-FULL: .p2align 4, 0x90 +; CHECK-FULL: .type _Z4buzzv,@function +; CHECK-FULL:_Z4buzzv: # @_Z4buzzv +; CHECK-FULL:.L_Z4buzzv$local: +; CHECK-FULL: .cfi_startproc +; CHECK-FULL:# %bb.0: # %entry +; CHECK-FULL: lfence +; CHECK-FULL: movl $10, -4(%rsp) +; CHECK-FULL: retq +; CHECK-FULL:.Lfunc_end0: +; CHECK-FULL: .size _Z4buzzv, .Lfunc_end0-_Z4buzzv +; CHECK-FULL: .cfi_endproc +; CHECK-FULL: # -- End function +; CHECK-FULL: .section ".note.GNU-stack","",@progbits diff --git a/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-only-first-lfence.ll b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-only-first-lfence.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-only-first-lfence.ll @@ -0,0 +1,150 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-only-first-lfence %s -o - | FileCheck %s + +define dso_local void @_Z4buzzv() { +entry: + %a = alloca i32, align 4 + store i32 10, i32* %a, align 4 + ret void +} + +define dso_local i32 @_Z3barPi(i32* %p) { +entry: + %retval = alloca i32, align 4 + %p.addr = alloca i32*, align 8 + %a = alloca [4 x i32], align 16 + %len = alloca i32, align 4 + store i32* %p, i32** %p.addr, align 8 + %0 = bitcast [4 x i32]* %a to i8* + store i32 4, i32* %len, align 4 + %1 = load i32*, i32** %p.addr, align 8 + %2 = load i32, i32* %1, align 4 + %3 = load i32, i32* %len, align 4 + %cmp = icmp slt i32 %2, %3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %4 = load i32*, i32** %p.addr, align 8 + %5 = load i32, i32* %4, align 4 + %idxprom = sext i32 %5 to i64 + %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i64 0, i64 %idxprom + %6 = load i32, i32* %arrayidx, align 4 + store i32 %6, i32* %retval, align 4 + br label %return + +if.else: ; preds = %entry + store i32 -1, i32* %retval, align 4 + br label %return + +return: ; preds = %if.else, %if.then + %7 = load i32, i32* %retval, align 4 + ret i32 %7 +} + +define dso_local i32 (i32*)* @_Z3bazv() { +entry: + %p = alloca i32 (i32*)*, align 8 + store i32 (i32*)* @_Z3barPi, i32 (i32*)** %p, align 8 + call void asm sideeffect "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32 (i32*)** %p, i32 (i32*)** %p) #3, !srcloc !2 + %0 = load i32 (i32*)*, i32 (i32*)** %p, align 8 + ret i32 (i32*)* %0 +} + +define dso_local void @_Z3fooPi(i32* %p) { +entry: + %p.addr = alloca i32*, align 8 + %t = alloca i32 (i32*)*, align 8 + store i32* %p, i32** %p.addr, align 8 + %call = call i32 (i32*)* @_Z3bazv() + store i32 (i32*)* %call, i32 (i32*)** %t, align 8 + %0 = load i32 (i32*)*, i32 (i32*)** %t, align 8 + %1 = load i32*, i32** %p.addr, align 8 + %call1 = call i32 %0(i32* %1) + ret void +} + +!2 = !{i32 233} + + +; CHECK: .globl _Z4buzzv # -- Begin function _Z4buzzv +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z4buzzv,@function +; CHECK:_Z4buzzv: # @_Z4buzzv +; CHECK:.L_Z4buzzv$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movl $10, -4(%rsp) +; CHECK: retq +; CHECK:.Lfunc_end0: +; CHECK: .size _Z4buzzv, .Lfunc_end0-_Z4buzzv +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3barPi # -- Begin function _Z3barPi +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3barPi,@function +; CHECK:_Z3barPi: # @_Z3barPi +; CHECK:.L_Z3barPi$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movq %rdi, -40(%rsp) +; CHECK: movl $4, -28(%rsp) +; CHECK: cmpl $3, (%rdi) +; CHECK: jg .LBB1_2 +; CHECK:# %bb.1: # %if.then +; CHECK: lfence +; CHECK: movq -40(%rsp), %rax +; CHECK: movslq (%rax), %rax +; CHECK: movl -24(%rsp,%rax,4), %eax +; CHECK: movl %eax, -44(%rsp) +; CHECK: movl -44(%rsp), %eax +; CHECK: retq +; CHECK:.LBB1_2: # %if.else +; CHECK: lfence +; CHECK: movl $-1, -44(%rsp) +; CHECK: movl -44(%rsp), %eax +; CHECK: retq +; CHECK:.Lfunc_end1: +; CHECK: .size _Z3barPi, .Lfunc_end1-_Z3barPi +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3bazv # -- Begin function _Z3bazv +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3bazv,@function +; CHECK:_Z3bazv: # @_Z3bazv +; CHECK:.L_Z3bazv$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movq $.L_Z3barPi$local, -8(%rsp) +; CHECK: #APP +; CHECK: #NO_APP +; CHECK: movq -8(%rsp), %rax +; CHECK: retq +; CHECK:.Lfunc_end2: +; CHECK: .size _Z3bazv, .Lfunc_end2-_Z3bazv +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3fooPi # -- Begin function _Z3fooPi +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3fooPi,@function +; CHECK:_Z3fooPi: # @_Z3fooPi +; CHECK:.L_Z3fooPi$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: subq $24, %rsp +; CHECK: .cfi_def_cfa_offset 32 +; CHECK: lfence +; CHECK: movq %rdi, 8(%rsp) +; CHECK: callq .L_Z3bazv$local +; CHECK: movq %rax, 16(%rsp) +; CHECK: movq 8(%rsp), %rdi +; CHECK: callq *%rax +; CHECK: addq $24, %rsp +; CHECK: .cfi_def_cfa_offset 8 +; CHECK: retq +; CHECK:.Lfunc_end3: +; CHECK: .size _Z3fooPi, .Lfunc_end3-_Z3fooPi +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .section ".note.GNU-stack","",@progbits diff --git a/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-only-lfence-non-const.ll b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-only-lfence-non-const.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-only-lfence-non-const.ll @@ -0,0 +1,161 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-only-lfence-non-const %s -o - | FileCheck %s + +define dso_local void @_Z4buzzv() { +entry: + %a = alloca i32, align 4 + store i32 10, i32* %a, align 4 + ret void +} + +define dso_local i32 @_Z3barPi(i32* %p) { +entry: + %retval = alloca i32, align 4 + %p.addr = alloca i32*, align 8 + %a = alloca [4 x i32], align 16 + %len = alloca i32, align 4 + store i32* %p, i32** %p.addr, align 8 + %0 = bitcast [4 x i32]* %a to i8* + store i32 4, i32* %len, align 4 + %1 = load i32*, i32** %p.addr, align 8 + %2 = load i32, i32* %1, align 4 + %3 = load i32, i32* %len, align 4 + %cmp = icmp slt i32 %2, %3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %4 = load i32*, i32** %p.addr, align 8 + %5 = load i32, i32* %4, align 4 + %idxprom = sext i32 %5 to i64 + %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i64 0, i64 %idxprom + %6 = load i32, i32* %arrayidx, align 4 + store i32 %6, i32* %retval, align 4 + br label %return + +if.else: ; preds = %entry + store i32 -1, i32* %retval, align 4 + br label %return + +return: ; preds = %if.else, %if.then + %7 = load i32, i32* %retval, align 4 + ret i32 %7 +} + +define dso_local i32 (i32*)* @_Z3bazv() { +entry: + %p = alloca i32 (i32*)*, align 8 + store i32 (i32*)* @_Z3barPi, i32 (i32*)** %p, align 8 + call void asm sideeffect "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32 (i32*)** %p, i32 (i32*)** %p) #3, !srcloc !2 + %0 = load i32 (i32*)*, i32 (i32*)** %p, align 8 + ret i32 (i32*)* %0 +} + +define dso_local void @_Z3fooPi(i32* %p) { +entry: + %p.addr = alloca i32*, align 8 + %t = alloca i32 (i32*)*, align 8 + store i32* %p, i32** %p.addr, align 8 + %call = call i32 (i32*)* @_Z3bazv() + store i32 (i32*)* %call, i32 (i32*)** %t, align 8 + %0 = load i32 (i32*)*, i32 (i32*)** %t, align 8 + %1 = load i32*, i32** %p.addr, align 8 + %call1 = call i32 %0(i32* %1) + ret void +} + +!2 = !{i32 233} + +; CHECK: .globl _Z4buzzv # -- Begin function _Z4buzzv +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z4buzzv,@function +; CHECK:_Z4buzzv: # @_Z4buzzv +; CHECK:.L_Z4buzzv$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movl $10, -4(%rsp) +; CHECK: retq +; CHECK:.Lfunc_end0: +; CHECK: .size _Z4buzzv, .Lfunc_end0-_Z4buzzv +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3barPi # -- Begin function _Z3barPi +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3barPi,@function +; CHECK:_Z3barPi: # @_Z3barPi +; CHECK:.L_Z3barPi$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movq %rdi, -40(%rsp) +; CHECK: lfence +; CHECK: movl $4, -28(%rsp) +; CHECK: lfence +; CHECK: cmpl $3, (%rdi) +; CHECK: lfence +; CHECK: jg .LBB1_2 +; CHECK:# %bb.1: # %if.then +; CHECK: lfence +; CHECK: movq -40(%rsp), %rax +; CHECK: lfence +; CHECK: movslq (%rax), %rax +; CHECK: lfence +; CHECK: movl -24(%rsp,%rax,4), %eax +; CHECK: lfence +; CHECK: movl %eax, -44(%rsp) +; CHECK: lfence +; CHECK: movl -44(%rsp), %eax +; CHECK: retq +; CHECK:.LBB1_2: # %if.else +; CHECK: lfence +; CHECK: movl $-1, -44(%rsp) +; CHECK: lfence +; CHECK: movl -44(%rsp), %eax +; CHECK: retq +; CHECK:.Lfunc_end1: +; CHECK: .size _Z3barPi, .Lfunc_end1-_Z3barPi +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3bazv # -- Begin function _Z3bazv +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3bazv,@function +; CHECK:_Z3bazv: # @_Z3bazv +; CHECK:.L_Z3bazv$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movq $.L_Z3barPi$local, -8(%rsp) +; CHECK: lfence +; CHECK: #APP +; CHECK: #NO_APP +; CHECK: lfence +; CHECK: movq -8(%rsp), %rax +; CHECK: retq +; CHECK:.Lfunc_end2: +; CHECK: .size _Z3bazv, .Lfunc_end2-_Z3bazv +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3fooPi # -- Begin function _Z3fooPi +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3fooPi,@function +; CHECK:_Z3fooPi: # @_Z3fooPi +; CHECK:.L_Z3fooPi$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: subq $24, %rsp +; CHECK: .cfi_def_cfa_offset 32 +; CHECK: lfence +; CHECK: movq %rdi, 8(%rsp) +; CHECK: callq .L_Z3bazv$local +; CHECK: lfence +; CHECK: movq %rax, 16(%rsp) +; CHECK: lfence +; CHECK: movq 8(%rsp), %rdi +; CHECK: callq *%rax +; CHECK: addq $24, %rsp +; CHECK: .cfi_def_cfa_offset 8 +; CHECK: retq +; CHECK:.Lfunc_end3: +; CHECK: .size _Z3fooPi, .Lfunc_end3-_Z3fooPi +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .section ".note.GNU-stack","",@progbits diff --git a/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression.ll b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression.ll @@ -0,0 +1,161 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable %s -o - | FileCheck %s + +define dso_local void @_Z4buzzv() { +entry: + %a = alloca i32, align 4 + store i32 10, i32* %a, align 4 + ret void +} + +define dso_local i32 @_Z3barPi(i32* %p) { +entry: + %retval = alloca i32, align 4 + %p.addr = alloca i32*, align 8 + %a = alloca [4 x i32], align 16 + %len = alloca i32, align 4 + store i32* %p, i32** %p.addr, align 8 + %0 = bitcast [4 x i32]* %a to i8* + store i32 4, i32* %len, align 4 + %1 = load i32*, i32** %p.addr, align 8 + %2 = load i32, i32* %1, align 4 + %3 = load i32, i32* %len, align 4 + %cmp = icmp slt i32 %2, %3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %4 = load i32*, i32** %p.addr, align 8 + %5 = load i32, i32* %4, align 4 + %idxprom = sext i32 %5 to i64 + %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i64 0, i64 %idxprom + %6 = load i32, i32* %arrayidx, align 4 + store i32 %6, i32* %retval, align 4 + br label %return + +if.else: ; preds = %entry + store i32 -1, i32* %retval, align 4 + br label %return + +return: ; preds = %if.else, %if.then + %7 = load i32, i32* %retval, align 4 + ret i32 %7 +} + +define dso_local i32 (i32*)* @_Z3bazv() { +entry: + %p = alloca i32 (i32*)*, align 8 + store i32 (i32*)* @_Z3barPi, i32 (i32*)** %p, align 8 + call void asm sideeffect "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32 (i32*)** %p, i32 (i32*)** %p) #3, !srcloc !2 + %0 = load i32 (i32*)*, i32 (i32*)** %p, align 8 + ret i32 (i32*)* %0 +} + +define dso_local void @_Z3fooPi(i32* %p) { +entry: + %p.addr = alloca i32*, align 8 + %t = alloca i32 (i32*)*, align 8 + store i32* %p, i32** %p.addr, align 8 + %call = call i32 (i32*)* @_Z3bazv() + store i32 (i32*)* %call, i32 (i32*)** %t, align 8 + %0 = load i32 (i32*)*, i32 (i32*)** %t, align 8 + %1 = load i32*, i32** %p.addr, align 8 + %call1 = call i32 %0(i32* %1) + ret void +} + +!2 = !{i32 233} + +; CHECK: .globl _Z4buzzv # -- Begin function _Z4buzzv +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z4buzzv,@function +; CHECK:_Z4buzzv: # @_Z4buzzv +; CHECK:.L_Z4buzzv$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movl $10, -4(%rsp) +; CHECK: retq +; CHECK:.Lfunc_end0: +; CHECK: .size _Z4buzzv, .Lfunc_end0-_Z4buzzv +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3barPi # -- Begin function _Z3barPi +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3barPi,@function +; CHECK:_Z3barPi: # @_Z3barPi +; CHECK:.L_Z3barPi$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movq %rdi, -40(%rsp) +; CHECK: lfence +; CHECK: movl $4, -28(%rsp) +; CHECK: lfence +; CHECK: cmpl $3, (%rdi) +; CHECK: lfence +; CHECK: jg .LBB1_2 +; CHECK:# %bb.1: # %if.then +; CHECK: lfence +; CHECK: movq -40(%rsp), %rax +; CHECK: lfence +; CHECK: movslq (%rax), %rax +; CHECK: lfence +; CHECK: movl -24(%rsp,%rax,4), %eax +; CHECK: lfence +; CHECK: movl %eax, -44(%rsp) +; CHECK: lfence +; CHECK: movl -44(%rsp), %eax +; CHECK: retq +; CHECK:.LBB1_2: # %if.else +; CHECK: lfence +; CHECK: movl $-1, -44(%rsp) +; CHECK: lfence +; CHECK: movl -44(%rsp), %eax +; CHECK: retq +; CHECK:.Lfunc_end1: +; CHECK: .size _Z3barPi, .Lfunc_end1-_Z3barPi +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3bazv # -- Begin function _Z3bazv +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3bazv,@function +; CHECK:_Z3bazv: # @_Z3bazv +; CHECK:.L_Z3bazv$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: lfence +; CHECK: movq $.L_Z3barPi$local, -8(%rsp) +; CHECK: lfence +; CHECK: #APP +; CHECK: #NO_APP +; CHECK: lfence +; CHECK: movq -8(%rsp), %rax +; CHECK: retq +; CHECK:.Lfunc_end2: +; CHECK: .size _Z3bazv, .Lfunc_end2-_Z3bazv +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .globl _Z3fooPi # -- Begin function _Z3fooPi +; CHECK: .p2align 4, 0x90 +; CHECK: .type _Z3fooPi,@function +; CHECK:_Z3fooPi: # @_Z3fooPi +; CHECK:.L_Z3fooPi$local: +; CHECK: .cfi_startproc +; CHECK:# %bb.0: # %entry +; CHECK: subq $24, %rsp +; CHECK: .cfi_def_cfa_offset 32 +; CHECK: lfence +; CHECK: movq %rdi, 8(%rsp) +; CHECK: callq .L_Z3bazv$local +; CHECK: lfence +; CHECK: movq %rax, 16(%rsp) +; CHECK: lfence +; CHECK: movq 8(%rsp), %rdi +; CHECK: callq *%rax +; CHECK: addq $24, %rsp +; CHECK: .cfi_def_cfa_offset 8 +; CHECK: retq +; CHECK:.Lfunc_end3: +; CHECK: .size _Z3fooPi, .Lfunc_end3-_Z3fooPi +; CHECK: .cfi_endproc +; CHECK: # -- End function +; CHECK: .section ".note.GNU-stack","",@progbits