diff --git a/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp --- a/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp @@ -55,6 +55,11 @@ cl::desc("Omit LFENCE in basic blocks without any loads even if there are stores."), cl::init(false), cl::Hidden); +static cl::opt OmitLFENCEInBasicBlocksWithOneLoadAndNoStores( + "x86-seses-omit-lfence-in-bb-with-one-load-no-stores", + cl::desc("Don't LFENCE in basic blocks with one load and no stores."), + cl::init(false), cl::Hidden); + static bool hasConstantAddressingMode(const MachineInstr &MI); namespace { @@ -86,16 +91,28 @@ const X86Subtarget &Subtarget = MF.getSubtarget(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); for (MachineBasicBlock &MBB : MF) { - if (OmitLFENCEInBasicBlocksWithoutLoads) { + // Let's do a pass over the basic block to see if we can skip LFENCEing it + // based on the number of loads and stores. + if (OmitLFENCEInBasicBlocksWithoutLoads || + OmitLFENCEInBasicBlocksWithOneLoadAndNoStores) { + int LoadCount = 0; + int StoreCount = 0; - bool FoundLoad = false; for (const MachineInstr &MI : MBB) { if (MI.mayLoad()) { - FoundLoad = true; - break; + LoadCount++; + } + if (MI.mayStore()) { + StoreCount++; } } - if (!FoundLoad) { + + if (OmitLFENCEInBasicBlocksWithoutLoads && LoadCount == 0) { + continue; + } + + if (OmitLFENCEInBasicBlocksWithOneLoadAndNoStores && StoreCount == 0 && + LoadCount <= 1) { continue; } } diff --git a/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll @@ -0,0 +1,77 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-omit-lfence-in-bb-with-one-load-no-stores %s -o - | FileCheck %s --check-prefix=CHECK-FLAGGED +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable %s -o - | FileCheck %s --check-prefix=CHECK-FULL + +define dso_local void @_Z3fooPi(i32* %p) #0 { +entry: + %p.addr = alloca i32*, align 8 + %a = alloca i32, align 4 + store i32* %p, i32** %p.addr, align 8 + %0 = load i32*, i32** %p.addr, align 8 + %1 = load i32, i32* %0, align 4 + %cmp = icmp eq i32 %1, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %2 = load i32*, i32** %p.addr, align 8 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +; CHECK-FLAGGED: .globl _Z3fooPi # -- Begin function _Z3fooPi +; CHECK-FLAGGED-NEXT: .p2align 4, 0x90 +; CHECK-FLAGGED-NEXT: .type _Z3fooPi,@function +; CHECK-FLAGGED-NEXT:_Z3fooPi: # @_Z3fooPi +; CHECK-FLAGGED-NEXT:.L_Z3fooPi$local: +; CHECK-FLAGGED-NEXT: .cfi_startproc +; CHECK-FLAGGED-NEXT:# %bb.0: # %entry +; CHECK-FLAGGED-NEXT: lfence +; CHECK-FLAGGED-NEXT: pushq %rbp +; CHECK-FLAGGED-NEXT: .cfi_def_cfa_offset 16 +; CHECK-FLAGGED-NEXT: .cfi_offset %rbp, -16 +; CHECK-FLAGGED-NEXT: movq %rsp, %rbp +; CHECK-FLAGGED-NEXT: .cfi_def_cfa_register %rbp +; CHECK-FLAGGED-NEXT: lfence +; CHECK-FLAGGED-NEXT: movq %rdi, -8(%rbp) +; CHECK-FLAGGED-NEXT: lfence +; CHECK-FLAGGED-NEXT: movq -8(%rbp), %rax +; CHECK-FLAGGED-NEXT: lfence +; CHECK-FLAGGED-NEXT: cmpl $0, (%rax) +; CHECK-FLAGGED-NEXT: lfence +; CHECK-FLAGGED-NEXT: jne .LBB0_2 +; CHECK-FLAGGED-NEXT:# %bb.1: # %if.then +; CHECK-FLAGGED-NEXT:.LBB0_2: # %if.end +; CHECK-FLAGGED-NEXT: popq %rbp +; CHECK-FLAGGED-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-FLAGGED-NEXT: retq + +; CHECK-FULL: .globl _Z3fooPi # -- Begin function _Z3fooPi +; CHECK-FULL-NEXT: .p2align 4, 0x90 +; CHECK-FULL-NEXT: .type _Z3fooPi,@function +; CHECK-FULL-NEXT:_Z3fooPi: # @_Z3fooPi +; CHECK-FULL-NEXT:.L_Z3fooPi$local: +; CHECK-FULL-NEXT: .cfi_startproc +; CHECK-FULL-NEXT:# %bb.0: # %entry +; CHECK-FULL-NEXT: lfence +; CHECK-FULL-NEXT: pushq %rbp +; CHECK-FULL-NEXT: .cfi_def_cfa_offset 16 +; CHECK-FULL-NEXT: .cfi_offset %rbp, -16 +; CHECK-FULL-NEXT: movq %rsp, %rbp +; CHECK-FULL-NEXT: .cfi_def_cfa_register %rbp +; CHECK-FULL-NEXT: lfence +; CHECK-FULL-NEXT: movq %rdi, -8(%rbp) +; CHECK-FULL-NEXT: lfence +; CHECK-FULL-NEXT: movq -8(%rbp), %rax +; CHECK-FULL-NEXT: lfence +; CHECK-FULL-NEXT: cmpl $0, (%rax) +; CHECK-FULL-NEXT: lfence +; CHECK-FULL-NEXT: jne .LBB0_2 +; CHECK-FULL-NEXT:# %bb.1: # %if.then +; CHECK-FULL-NEXT:.LBB0_2: # %if.end +; CHECK-FULL-NEXT: lfence +; CHECK-FULL-NEXT: popq %rbp +; CHECK-FULL-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-FULL-NEXT: retq