diff --git a/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
--- a/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
+++ b/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
@@ -55,6 +55,11 @@
                       cl::desc("Omit LFENCE in basic blocks without any loads even if there are stores."),
                       cl::init(false), cl::Hidden);
 
+static cl::opt<bool> OmitLFENCEInBasicBlocksWithOneLoadAndNoStores(
+    "x86-seses-omit-lfence-in-bb-with-one-load-no-stores",
+    cl::desc("Don't LFENCE in basic blocks with one load and no stores."),
+    cl::init(false), cl::Hidden);
+
 static bool hasConstantAddressingMode(const MachineInstr &MI);
 
 namespace {
@@ -86,16 +91,28 @@
   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
   const X86InstrInfo *TII = Subtarget.getInstrInfo();
   for (MachineBasicBlock &MBB : MF) {
-    if (OmitLFENCEInBasicBlocksWithoutLoads) {
+    // Let's do a pass over the basic block to see if we can skip LFENCEing it
+    // based on the number of loads and stores.
+    if (OmitLFENCEInBasicBlocksWithoutLoads ||
+        OmitLFENCEInBasicBlocksWithOneLoadAndNoStores) {
+      int LoadCount = 0;
+      int StoreCount = 0;
 
-      bool FoundLoad = false;
       for (const MachineInstr &MI : MBB) {
         if (MI.mayLoad()) {
-          FoundLoad = true;
-          break;
+          LoadCount++;
+        }
+        if (MI.mayStore()) {
+          StoreCount++;
         }
       }
-      if (!FoundLoad) {
+
+      if (OmitLFENCEInBasicBlocksWithoutLoads && LoadCount == 0) {
+        continue;
+      }
+
+      if (OmitLFENCEInBasicBlocksWithOneLoadAndNoStores && StoreCount == 0 &&
+          LoadCount <= 1) {
         continue;
       }
     }
diff --git a/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/speculative-execution-side-effect-suppression-omit-lfence-in-bb-with-one-load-no-stores.ll
@@ -0,0 +1,77 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable -x86-seses-omit-lfence-in-bb-with-one-load-no-stores %s -o - | FileCheck %s --check-prefix=CHECK-FLAGGED
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -x86-seses-enable %s -o - | FileCheck %s --check-prefix=CHECK-FULL
+
+define dso_local void @_Z3fooPi(i32* %p) #0 {
+entry:
+  %p.addr = alloca i32*, align 8
+  %a = alloca i32, align 4
+  store i32* %p, i32** %p.addr, align 8
+  %0 = load i32*, i32** %p.addr, align 8
+  %1 = load i32, i32* %0, align 4
+  %cmp = icmp eq i32 %1, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %2 = load i32*, i32** %p.addr, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; CHECK-FLAGGED: .globl _Z3fooPi                # -- Begin function _Z3fooPi
+; CHECK-FLAGGED-NEXT: .p2align 4, 0x90
+; CHECK-FLAGGED-NEXT: .type _Z3fooPi,@function
+; CHECK-FLAGGED-NEXT:_Z3fooPi:                               # @_Z3fooPi
+; CHECK-FLAGGED-NEXT:.L_Z3fooPi$local:
+; CHECK-FLAGGED-NEXT: .cfi_startproc
+; CHECK-FLAGGED-NEXT:# %bb.0:                                # %entry
+; CHECK-FLAGGED-NEXT: lfence
+; CHECK-FLAGGED-NEXT: pushq %rbp
+; CHECK-FLAGGED-NEXT: .cfi_def_cfa_offset 16
+; CHECK-FLAGGED-NEXT: .cfi_offset %rbp, -16
+; CHECK-FLAGGED-NEXT: movq %rsp, %rbp
+; CHECK-FLAGGED-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-FLAGGED-NEXT: lfence
+; CHECK-FLAGGED-NEXT: movq %rdi, -8(%rbp)
+; CHECK-FLAGGED-NEXT: lfence
+; CHECK-FLAGGED-NEXT: movq -8(%rbp), %rax
+; CHECK-FLAGGED-NEXT: lfence
+; CHECK-FLAGGED-NEXT: cmpl $0, (%rax)
+; CHECK-FLAGGED-NEXT: lfence
+; CHECK-FLAGGED-NEXT: jne .LBB0_2
+; CHECK-FLAGGED-NEXT:# %bb.1:                                # %if.then
+; CHECK-FLAGGED-NEXT:.LBB0_2:                                # %if.end
+; CHECK-FLAGGED-NEXT: popq %rbp
+; CHECK-FLAGGED-NEXT: .cfi_def_cfa %rsp, 8
+; CHECK-FLAGGED-NEXT: retq
+
+; CHECK-FULL: .globl _Z3fooPi                # -- Begin function _Z3fooPi
+; CHECK-FULL-NEXT: .p2align 4, 0x90
+; CHECK-FULL-NEXT: .type _Z3fooPi,@function
+; CHECK-FULL-NEXT:_Z3fooPi:                               # @_Z3fooPi
+; CHECK-FULL-NEXT:.L_Z3fooPi$local:
+; CHECK-FULL-NEXT: .cfi_startproc
+; CHECK-FULL-NEXT:# %bb.0:                                # %entry
+; CHECK-FULL-NEXT: lfence
+; CHECK-FULL-NEXT: pushq %rbp
+; CHECK-FULL-NEXT: .cfi_def_cfa_offset 16
+; CHECK-FULL-NEXT: .cfi_offset %rbp, -16
+; CHECK-FULL-NEXT: movq %rsp, %rbp
+; CHECK-FULL-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-FULL-NEXT: lfence
+; CHECK-FULL-NEXT: movq %rdi, -8(%rbp)
+; CHECK-FULL-NEXT: lfence
+; CHECK-FULL-NEXT: movq -8(%rbp), %rax
+; CHECK-FULL-NEXT: lfence
+; CHECK-FULL-NEXT: cmpl $0, (%rax)
+; CHECK-FULL-NEXT: lfence
+; CHECK-FULL-NEXT: jne .LBB0_2
+; CHECK-FULL-NEXT:# %bb.1:                                # %if.then
+; CHECK-FULL-NEXT:.LBB0_2:                                # %if.end
+; CHECK-FULL-NEXT: lfence
+; CHECK-FULL-NEXT: popq %rbp
+; CHECK-FULL-NEXT: .cfi_def_cfa %rsp, 8
+; CHECK-FULL-NEXT: retq