diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -64,6 +64,7 @@ X86SelectionDAGInfo.cpp X86ShuffleDecodeConstantPool.cpp X86SpeculativeLoadHardening.cpp + X86SpeculativeExecutionSideEffectSuppression.cpp X86Subtarget.cpp X86TargetMachine.cpp X86TargetObjectFile.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -142,6 +142,7 @@ FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); +FunctionPass *createX86SpeculativeExecutionSideEffectSuppression(); void initializeEvexToVexInstPassPass(PassRegistry &); void initializeFixupBWInstPassPass(PassRegistry &); @@ -162,6 +163,7 @@ void initializeX86OptimizeLEAPassPass(PassRegistry &); void initializeX86PartialReductionPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); +void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &); namespace X86AS { enum : unsigned { diff --git a/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp @@ -0,0 +1,157 @@ +//===-- X86SpeculativeExecutionSideEffectSuppression.cpp ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file contains the X86 implementation of the speculative execution side +/// effect suppression mitigation. +/// +/// This must be used with the -mlvi-cfi flag in order to mitigate indirect +/// branches and returns. +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Pass.h" +using namespace llvm; + +#define DEBUG_TYPE "x86-seses" + +STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted"); + +static cl::opt EnableSpeculativeExecutionSideEffectSuppression( + "x86-seses-enable", + cl::desc("Force enable speculative execution side effect suppresion. " + "(Note: User must pass -mlvi-cfi in order to mitigate indirect " + "branches and returns.)"), + cl::init(false), cl::Hidden); + +static cl::opt OneLFENCEPerBasicBlock( + "x86-seses-one-lfence-per-bb", + cl::desc( + "Omit all lfences other than the first to be placed in a basic block."), + cl::init(false), cl::Hidden); + +static cl::opt OnlyLFENCENonConst( + "x86-seses-only-lfence-non-const", + cl::desc("Only lfence before groups of terminators where at least one " + "branch instruction has an input to the addressing mode that is a " + "register other than %rip."), + cl::init(false), cl::Hidden); + +static cl::opt + OmitBranchLFENCEs("x86-seses-omit-branch-lfences", + cl::desc("Omit all lfences before branch instructions."), + cl::init(false), cl::Hidden); + +namespace { + +class X86SpeculativeExecutionSideEffectSuppression + : public MachineFunctionPass { +public: + X86SpeculativeExecutionSideEffectSuppression() : MachineFunctionPass(ID) {} + + static char ID; + StringRef getPassName() const override { + return "X86 Speculative Execution Side Effect Suppression"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // namespace + +char X86SpeculativeExecutionSideEffectSuppression::ID = 0; + +// This function returns whether the passed instruction uses a memory addressing +// mode that is constant. We treat all memory addressing modes that read +// from a register that is not %rip as non-constant. Note that the use +// of the EFLAGS register results in an addressing mode being considered +// non-constant, therefore all JCC instructions will return false from this +// function since one of their operands will always be the EFLAGS register. +static bool hasConstantAddressingMode(const MachineInstr &MI) { + for (const MachineOperand &MO : MI.uses()) + if (MO.isReg() && X86::RIP != MO.getReg()) + return false; + return true; +} + +bool X86SpeculativeExecutionSideEffectSuppression::runOnMachineFunction( + MachineFunction &MF) { + if (!EnableSpeculativeExecutionSideEffectSuppression) + return false; + + LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() + << " **********\n"); + bool Modified = false; + const X86Subtarget &Subtarget = MF.getSubtarget(); + const X86InstrInfo *TII = Subtarget.getInstrInfo(); + for (MachineBasicBlock &MBB : MF) { + MachineInstr *FirstTerminator = nullptr; + + for (auto &MI : MBB) { + // We want to put an LFENCE before any instruction that + // may load or store. This LFENCE is intended to avoid leaking any secret + // data due to a given load or store. This results in closing the cache + // and memory timing side channels. We will treat terminators that load + // or store separately. + if (MI.mayLoadOrStore() && !MI.isTerminator()) { + BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE)); + NumLFENCEsInserted++; + Modified = true; + if (OneLFENCEPerBasicBlock) + break; + } + // The following section will be LFENCEing before groups of terminators + // that include branches. This will close the branch prediction side + // channels since we will prevent code executing after misspeculation as + // a result of the LFENCEs placed with this logic. + + // Keep track of the first terminator in a basic block since if we need + // to LFENCE the terminators in this basic block we must add the + // instruction before the first terminator in the basic block (as + // opposed to before the terminator that indicates an LFENCE is + // required). An example of why this is necessary is that the + // X86InstrInfo::analyzeBranch method assumes all terminators are grouped + // together and terminates it's analysis once the first non-termintor + // instruction is found. + if (MI.isTerminator() && FirstTerminator == nullptr) + FirstTerminator = &MI; + + // Look for branch instructions that will require an LFENCE to be put + // before this basic block's terminators. + if (!MI.isBranch() || OmitBranchLFENCEs) + // This isn't a branch or we're not putting LFENCEs before branches. + continue; + + if (OnlyLFENCENonConst && hasConstantAddressingMode(MI)) + // This is a branch, but it only has constant addressing mode and we're + // not adding LFENCEs before such branches. + continue; + + // This branch requires adding an LFENCE. + BuildMI(MBB, FirstTerminator, DebugLoc(), TII->get(X86::LFENCE)); + NumLFENCEsInserted++; + Modified = true; + break; + } + } + + return Modified; +} + +FunctionPass *llvm::createX86SpeculativeExecutionSideEffectSuppression() { + return new X86SpeculativeExecutionSideEffectSuppression(); +} + +INITIALIZE_PASS(X86SpeculativeExecutionSideEffectSuppression, "x86-seses", + "X86 Speculative Execution Side Effect Suppresion", false, + false) diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -82,6 +82,7 @@ initializeX86AvoidSFBPassPass(PR); initializeX86AvoidTrailingCallPassPass(PR); initializeX86SpeculativeLoadHardeningPassPass(PR); + initializeX86SpeculativeExecutionSideEffectSuppressionPass(PR); initializeX86FlagsCopyLoweringPassPass(PR); initializeX86CondBrFoldingPassPass(PR); initializeX86LoadValueInjectionRetHardeningPassPass(PR); @@ -525,6 +526,16 @@ const Triple &TT = TM->getTargetTriple(); const MCAsmInfo *MAI = TM->getMCAsmInfo(); + // The X86 Speculative Execution Pass must run after all control + // flow graph modifying passes. As a result it was listed to run right before + // the X86 Retpoline Thunks pass. The reason it must run after control flow + // graph modifications is that the model of LFENCE in LLVM has to be updated + // (FIXME: https://bugs.llvm.org/show_bug.cgi?id=45167). Currently the + // placement of this pass was hand checked to ensure that the subsequent + // passes don't move the code around the LFENCEs in a way that will hurt the + // correctness of this pass. This placement has been shown to work based on + // hand inspection of the codegen output. + addPass(createX86SpeculativeExecutionSideEffectSuppression()); addPass(createX86IndirectThunksPass()); // Insert extra int3 instructions after trailing call instructions to avoid diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -73,6 +73,7 @@ ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: X86 Speculative Execution Side Effect Suppression ; CHECK-NEXT: X86 Indirect Thunks ; CHECK-NEXT: Check CFA info and insert CFI instructions if needed ; CHECK-NEXT: X86 Load Value Injection (LVI) Ret-Hardening diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll --- a/llvm/test/CodeGen/X86/O3-pipeline.ll +++ b/llvm/test/CodeGen/X86/O3-pipeline.ll @@ -186,6 +186,7 @@ ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: X86 Speculative Execution Side Effect Suppression ; CHECK-NEXT: X86 Indirect Thunks ; CHECK-NEXT: Check CFA info and insert CFI instructions if needed ; CHECK-NEXT: X86 Load Value Injection (LVI) Ret-Hardening diff --git a/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression.ll b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression.ll @@ -0,0 +1,300 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-one-lfence-per-bb %s -o - | FileCheck %s --check-prefix=X86-ONE-LFENCE +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-omit-branch-lfences %s -o - | FileCheck %s --check-prefix=X86-OMIT-BR +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-only-lfence-non-const %s -o - | FileCheck %s --check-prefix=X86-NON-CONST + +define void @_Z4buzzv() { +; CHECK-LABEL: _Z4buzzv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfence +; CHECK-NEXT: movl $10, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: retq +; +; X86-ONE-LFENCE-LABEL: _Z4buzzv: +; X86-ONE-LFENCE: # %bb.0: # %entry +; X86-ONE-LFENCE-NEXT: lfence +; X86-ONE-LFENCE-NEXT: movl $10, -{{[0-9]+}}(%rsp) +; X86-ONE-LFENCE-NEXT: retq +; +; X86-OMIT-BR-LABEL: _Z4buzzv: +; X86-OMIT-BR: # %bb.0: # %entry +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movl $10, -{{[0-9]+}}(%rsp) +; X86-OMIT-BR-NEXT: retq +; +; X86-NON-CONST-LABEL: _Z4buzzv: +; X86-NON-CONST: # %bb.0: # %entry +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movl $10, -{{[0-9]+}}(%rsp) +; X86-NON-CONST-NEXT: retq +entry: + %a = alloca i32, align 4 + store i32 10, i32* %a, align 4 + ret void +} + +define i32 @_Z3barPi(i32* %p) { +; CHECK-LABEL: _Z3barPi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfence +; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: lfence +; CHECK-NEXT: movl $4, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: lfence +; CHECK-NEXT: cmpl $3, (%rdi) +; CHECK-NEXT: lfence +; CHECK-NEXT: jg .LBB1_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: lfence +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: lfence +; CHECK-NEXT: movslq (%rax), %rax +; CHECK-NEXT: lfence +; CHECK-NEXT: movl -24(%rsp,%rax,4), %eax +; CHECK-NEXT: lfence +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: lfence +; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_2: # %if.else +; CHECK-NEXT: lfence +; CHECK-NEXT: movl $-1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: lfence +; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: retq +; +; X86-ONE-LFENCE-LABEL: _Z3barPi: +; X86-ONE-LFENCE: # %bb.0: # %entry +; X86-ONE-LFENCE-NEXT: lfence +; X86-ONE-LFENCE-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X86-ONE-LFENCE-NEXT: movl $4, -{{[0-9]+}}(%rsp) +; X86-ONE-LFENCE-NEXT: cmpl $3, (%rdi) +; X86-ONE-LFENCE-NEXT: jg .LBB1_2 +; X86-ONE-LFENCE-NEXT: # %bb.1: # %if.then +; X86-ONE-LFENCE-NEXT: lfence +; X86-ONE-LFENCE-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X86-ONE-LFENCE-NEXT: movslq (%rax), %rax +; X86-ONE-LFENCE-NEXT: movl -24(%rsp,%rax,4), %eax +; X86-ONE-LFENCE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X86-ONE-LFENCE-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X86-ONE-LFENCE-NEXT: retq +; X86-ONE-LFENCE-NEXT: .LBB1_2: # %if.else +; X86-ONE-LFENCE-NEXT: lfence +; X86-ONE-LFENCE-NEXT: movl $-1, -{{[0-9]+}}(%rsp) +; X86-ONE-LFENCE-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X86-ONE-LFENCE-NEXT: retq +; +; X86-OMIT-BR-LABEL: _Z3barPi: +; X86-OMIT-BR: # %bb.0: # %entry +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movl $4, -{{[0-9]+}}(%rsp) +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: cmpl $3, (%rdi) +; X86-OMIT-BR-NEXT: jg .LBB1_2 +; X86-OMIT-BR-NEXT: # %bb.1: # %if.then +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movslq (%rax), %rax +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movl -24(%rsp,%rax,4), %eax +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X86-OMIT-BR-NEXT: retq +; X86-OMIT-BR-NEXT: .LBB1_2: # %if.else +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movl $-1, -{{[0-9]+}}(%rsp) +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X86-OMIT-BR-NEXT: retq +; +; X86-NON-CONST-LABEL: _Z3barPi: +; X86-NON-CONST: # %bb.0: # %entry +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movl $4, -{{[0-9]+}}(%rsp) +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: cmpl $3, (%rdi) +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: jg .LBB1_2 +; X86-NON-CONST-NEXT: # %bb.1: # %if.then +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movslq (%rax), %rax +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movl -24(%rsp,%rax,4), %eax +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X86-NON-CONST-NEXT: retq +; X86-NON-CONST-NEXT: .LBB1_2: # %if.else +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movl $-1, -{{[0-9]+}}(%rsp) +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X86-NON-CONST-NEXT: retq +entry: + %retval = alloca i32, align 4 + %p.addr = alloca i32*, align 8 + %a = alloca [4 x i32], align 16 + %len = alloca i32, align 4 + store i32* %p, i32** %p.addr, align 8 + %0 = bitcast [4 x i32]* %a to i8* + store i32 4, i32* %len, align 4 + %1 = load i32*, i32** %p.addr, align 8 + %2 = load i32, i32* %1, align 4 + %3 = load i32, i32* %len, align 4 + %cmp = icmp slt i32 %2, %3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %4 = load i32*, i32** %p.addr, align 8 + %5 = load i32, i32* %4, align 4 + %idxprom = sext i32 %5 to i64 + %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i64 0, i64 %idxprom + %6 = load i32, i32* %arrayidx, align 4 + store i32 %6, i32* %retval, align 4 + br label %return + +if.else: ; preds = %entry + store i32 -1, i32* %retval, align 4 + br label %return + +return: ; preds = %if.else, %if.then + %7 = load i32, i32* %retval, align 4 + ret i32 %7 +} + +define i32 (i32*)* @_Z3bazv() { +; CHECK-LABEL: _Z3bazv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfence +; CHECK-NEXT: movq $_Z3barPi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: lfence +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lfence +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: retq +; +; X86-ONE-LFENCE-LABEL: _Z3bazv: +; X86-ONE-LFENCE: # %bb.0: # %entry +; X86-ONE-LFENCE-NEXT: lfence +; X86-ONE-LFENCE-NEXT: movq $_Z3barPi, -{{[0-9]+}}(%rsp) +; X86-ONE-LFENCE-NEXT: #APP +; X86-ONE-LFENCE-NEXT: #NO_APP +; X86-ONE-LFENCE-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X86-ONE-LFENCE-NEXT: retq +; +; X86-OMIT-BR-LABEL: _Z3bazv: +; X86-OMIT-BR: # %bb.0: # %entry +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movq $_Z3barPi, -{{[0-9]+}}(%rsp) +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: #APP +; X86-OMIT-BR-NEXT: #NO_APP +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X86-OMIT-BR-NEXT: retq +; +; X86-NON-CONST-LABEL: _Z3bazv: +; X86-NON-CONST: # %bb.0: # %entry +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movq $_Z3barPi, -{{[0-9]+}}(%rsp) +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: #APP +; X86-NON-CONST-NEXT: #NO_APP +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X86-NON-CONST-NEXT: retq +entry: + %p = alloca i32 (i32*)*, align 8 + store i32 (i32*)* @_Z3barPi, i32 (i32*)** %p, align 8 + call void asm sideeffect "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32 (i32*)** %p, i32 (i32*)** %p) #3, !srcloc !2 + %0 = load i32 (i32*)*, i32 (i32*)** %p, align 8 + ret i32 (i32*)* %0 +} + +define void @_Z3fooPi(i32* %p) { +; CHECK-LABEL: _Z3fooPi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: lfence +; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: callq _Z3bazv +; CHECK-NEXT: lfence +; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-NEXT: lfence +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; X86-ONE-LFENCE-LABEL: _Z3fooPi: +; X86-ONE-LFENCE: # %bb.0: # %entry +; X86-ONE-LFENCE-NEXT: subq $24, %rsp +; X86-ONE-LFENCE-NEXT: .cfi_def_cfa_offset 32 +; X86-ONE-LFENCE-NEXT: lfence +; X86-ONE-LFENCE-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X86-ONE-LFENCE-NEXT: callq _Z3bazv +; X86-ONE-LFENCE-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X86-ONE-LFENCE-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; X86-ONE-LFENCE-NEXT: callq *%rax +; X86-ONE-LFENCE-NEXT: addq $24, %rsp +; X86-ONE-LFENCE-NEXT: .cfi_def_cfa_offset 8 +; X86-ONE-LFENCE-NEXT: retq +; +; X86-OMIT-BR-LABEL: _Z3fooPi: +; X86-OMIT-BR: # %bb.0: # %entry +; X86-OMIT-BR-NEXT: subq $24, %rsp +; X86-OMIT-BR-NEXT: .cfi_def_cfa_offset 32 +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X86-OMIT-BR-NEXT: callq _Z3bazv +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X86-OMIT-BR-NEXT: lfence +; X86-OMIT-BR-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; X86-OMIT-BR-NEXT: callq *%rax +; X86-OMIT-BR-NEXT: addq $24, %rsp +; X86-OMIT-BR-NEXT: .cfi_def_cfa_offset 8 +; X86-OMIT-BR-NEXT: retq +; +; X86-NON-CONST-LABEL: _Z3fooPi: +; X86-NON-CONST: # %bb.0: # %entry +; X86-NON-CONST-NEXT: subq $24, %rsp +; X86-NON-CONST-NEXT: .cfi_def_cfa_offset 32 +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X86-NON-CONST-NEXT: callq _Z3bazv +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X86-NON-CONST-NEXT: lfence +; X86-NON-CONST-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; X86-NON-CONST-NEXT: callq *%rax +; X86-NON-CONST-NEXT: addq $24, %rsp +; X86-NON-CONST-NEXT: .cfi_def_cfa_offset 8 +; X86-NON-CONST-NEXT: retq +entry: + %p.addr = alloca i32*, align 8 + %t = alloca i32 (i32*)*, align 8 + store i32* %p, i32** %p.addr, align 8 + %call = call i32 (i32*)* @_Z3bazv() + store i32 (i32*)* %call, i32 (i32*)** %t, align 8 + %0 = load i32 (i32*)*, i32 (i32*)** %t, align 8 + %1 = load i32*, i32** %p.addr, align 8 + %call1 = call i32 %0(i32* %1) + ret void +} + +!2 = !{i32 233}