diff --git a/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp --- a/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp @@ -55,6 +55,11 @@ cl::desc("Omit LFENCE in basic blocks without any loads even if there are stores."), cl::init(false), cl::Hidden); +static cl::opt + OmitLFENCEInBasicBlocksWithOneLoadAndNoStores("x86-seses-omit-lfence-in-bb-with-one-load-no-stores", + cl::desc("Don't LFENCE in basic blocks with one load and no stores."), + cl::init(false), cl::Hidden); + static bool hasConstantAddressingMode(const MachineInstr &MI); namespace { @@ -86,16 +91,27 @@ const X86Subtarget &Subtarget = MF.getSubtarget(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); for (MachineBasicBlock &MBB : MF) { - if (OmitLFENCEInBasicBlocksWithoutLoads) { + // Let's do a pass over the basic block to see if we can skip LFENCEing it + // based on the number of loads and stores. + if (OmitLFENCEInBasicBlocksWithoutLoads || OmitLFENCEInBasicBlocksWithOneLoadAndNoStores) { - bool FoundLoad = false; - for (const MachineInstr &MI : MBB) { + int LoadCount = 0; + int StoreCount = 0; + + for (const MachineInstr& MI: MBB) { if (MI.mayLoad()) { - FoundLoad = true; - break; + LoadCount++; + } + if (MI.mayStore()) { + StoreCount++; } } - if (!FoundLoad) { + + if (OmitLFENCEInBasicBlocksWithoutLoads && LoadCount == 0) { + continue; + } + + if (OmitLFENCEInBasicBlocksWithOneLoadAndNoStores && StoreCount == 0 && LoadCount <= 1) { continue; } } diff --git a/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll @@ -0,0 +1,83 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-omit-lfence-in-bb-with-one-load-no-stores %s -o - | FileCheck %s --check-prefix=CHECK-FLAGGED +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable %s -o - | FileCheck %s --check-prefix=CHECK-FULL + +define dso_local void @_Z3fooPi(i32* %p) #0 { +entry: + %p.addr = alloca i32*, align 8 + %a = alloca i32, align 4 + store i32* %p, i32** %p.addr, align 8 + %0 = load i32*, i32** %p.addr, align 8 + %1 = load i32, i32* %0, align 4 + %cmp = icmp eq i32 %1, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %2 = load i32*, i32** %p.addr, align 8 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +; CHECK-FLAGGED: .globl _Z3fooPi # -- Begin function _Z3fooPi +; CHECK-FLAGGED: .p2align 4, 0x90 +; CHECK-FLAGGED: .type _Z3fooPi,@function +; CHECK-FLAGGED:_Z3fooPi: # @_Z3fooPi +; CHECK-FLAGGED:.L_Z3fooPi$local: +; CHECK-FLAGGED: .cfi_startproc +; CHECK-FLAGGED:# %bb.0: # %entry +; CHECK-FLAGGED: lfence +; CHECK-FLAGGED: pushq %rbp +; CHECK-FLAGGED: .cfi_def_cfa_offset 16 +; CHECK-FLAGGED: .cfi_offset %rbp, -16 +; CHECK-FLAGGED: movq %rsp, %rbp +; CHECK-FLAGGED: .cfi_def_cfa_register %rbp +; CHECK-FLAGGED: lfence +; CHECK-FLAGGED: movq %rdi, -8(%rbp) +; CHECK-FLAGGED: lfence +; CHECK-FLAGGED: movq -8(%rbp), %rax +; CHECK-FLAGGED: lfence +; CHECK-FLAGGED: cmpl $0, (%rax) +; CHECK-FLAGGED: lfence +; CHECK-FLAGGED: jne .LBB0_2 +; CHECK-FLAGGED:# %bb.1: # %if.then +; CHECK-FLAGGED:.LBB0_2: # %if.end +; CHECK-FLAGGED: popq %rbp +; CHECK-FLAGGED: .cfi_def_cfa %rsp, 8 +; CHECK-FLAGGED: retq +; CHECK-FLAGGED:.Lfunc_end0: +; CHECK-FLAGGED: .size _Z3fooPi, .Lfunc_end0-_Z3fooPi +; CHECK-FLAGGED: .cfi_endproc + +; CHECK-FULL: .globl _Z3fooPi # -- Begin function _Z3fooPi +; CHECK-FULL: .p2align 4, 0x90 +; CHECK-FULL: .type _Z3fooPi,@function +; CHECK-FULL:_Z3fooPi: # @_Z3fooPi +; CHECK-FULL:.L_Z3fooPi$local: +; CHECK-FULL: .cfi_startproc +; CHECK-FULL:# %bb.0: # %entry +; CHECK-FULL: lfence +; CHECK-FULL: pushq %rbp +; CHECK-FULL: .cfi_def_cfa_offset 16 +; CHECK-FULL: .cfi_offset %rbp, -16 +; CHECK-FULL: movq %rsp, %rbp +; CHECK-FULL: .cfi_def_cfa_register %rbp +; CHECK-FULL: lfence +; CHECK-FULL: movq %rdi, -8(%rbp) +; CHECK-FULL: lfence +; CHECK-FULL: movq -8(%rbp), %rax +; CHECK-FULL: lfence +; CHECK-FULL: cmpl $0, (%rax) +; CHECK-FULL: lfence +; CHECK-FULL: jne .LBB0_2 +; CHECK-FULL:# %bb.1: # %if.then +; CHECK-FULL:.LBB0_2: # %if.end +; CHECK-FULL: lfence +; CHECK-FULL: popq %rbp +; CHECK-FULL: .cfi_def_cfa %rsp, 8 +; CHECK-FULL: retq +; CHECK-FULL:.Lfunc_end0: +; CHECK-FULL: .size _Z3fooPi, .Lfunc_end0-_Z3fooPi +; CHECK-FULL: .cfi_endproc