diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -29,7 +29,6 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/ProfDataUtils.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
@@ -182,7 +181,8 @@
   // consisting of instructions exclusively computed for producing the operands
   // of the source instruction.
   void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
-                             bool ForSinking = false);
+                             bool ForSinking = false,
+                             Instruction *SinkPt = nullptr);
 
   // Returns true if the condition of the select is highly predictable.
   bool isSelectHighlyPredictable(const SelectInst *SI);
@@ -377,13 +377,13 @@
       // false operands.
       if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
         std::stack<Instruction *> TrueSlice;
-        getExclBackwardsSlice(TI, TrueSlice, true);
+        getExclBackwardsSlice(TI, TrueSlice, true, SI);
         maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
         TrueSlices.push_back(TrueSlice);
       }
       if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
         std::stack<Instruction *> FalseSlice;
-        getExclBackwardsSlice(FI, FalseSlice, true);
+        getExclBackwardsSlice(FI, FalseSlice, true, SI);
         maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
         FalseSlices.push_back(FalseSlice);
       }
@@ -470,22 +470,6 @@
       auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
       FalseBranch->setDebugLoc(SI->getDebugLoc());
     }
-    // If there was sinking, move any lifetime-end intrinsic calls found in the
-    // StartBlock to the newly-created end block to ensure sound lifetime info
-    // after sinking (in case any use is sinked).
-    else {
-      SmallVector<Instruction *, 2> EndLifetimeCalls;
-      for (Instruction &I : *StartBlock) {
-        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
-          if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
-            EndLifetimeCalls.push_back(&I);
-          }
-        }
-      }
-      for (auto *LC : EndLifetimeCalls) {
-        LC->moveBefore(&*EndBlock->getFirstInsertionPt());
-      }
-    }
 
     // Insert the real conditional branch based on the original condition.
     // If we did not create a new block for one of the 'true' or 'false' paths
@@ -721,6 +705,22 @@
   return false;
 }
 
+// Check if it is safe to move LoadI to the SinkPt.
+// Conservatively assume it is safe only if there is no instruction
+// modifying memory in-between the load and the sink point.
+static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SinkPt) {
+  // Ensure that the instructions are on the basic block.
+  if (LoadI->getParent() != SinkPt->getParent())
+    return false;
+  auto It = LoadI->getIterator();
+  while (&*It != SinkPt) {
+    if (It->mayWriteToMemory())
+      return false;
+    It++;
+  }
+  return true;
+}
+
 // For a given source instruction, collect its backwards dependence slice
 // consisting of instructions exclusively computed for the purpose of producing
 // the operands of the source instruction. As an approximation
@@ -729,7 +729,8 @@
 // form an one-use chain that leads to the source instruction.
 void SelectOptimize::getExclBackwardsSlice(Instruction *I,
                                            std::stack<Instruction *> &Slice,
-                                           bool ForSinking) {
+                                           bool ForSinking,
+                                           Instruction *SinkPt) {
   SmallPtrSet<Instruction *, 2> Visited;
   std::queue<Instruction *> Worklist;
   Worklist.push(I);
@@ -751,6 +752,18 @@
                        isa<SelectInst>(II) || isa<PHINode>(II)))
       continue;
 
+    // Avoid sinking loads in order not to skip state-modifying instructions,
+    // that may alias with the loaded address.
+    // Only allow sinking of loads within the same basic block that are
+    // conservatively proven to be safe.
+    if (ForSinking && II->mayReadFromMemory() && SinkPt) {
+      if (II->getParent() != SinkPt->getParent()) {
+        continue;
+      } else if (!isSafeToSinkLoad(II, SinkPt)) {
+        continue;
+      }
+    }
+
     // Avoid considering instructions with less frequency than the source
     // instruction (i.e., avoid colder code regions of the dependence slice).
     if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll
--- a/llvm/test/CodeGen/X86/select-optimize.ll
+++ b/llvm/test/CodeGen/X86/select-optimize.ll
@@ -185,9 +185,8 @@
   ret i32 %sel
 }
 
-; Cold value operand with load in its one-use dependence slice shoud result
+; Cold value operand with load in its one-use dependence slice should result
 ; into a branch with sinked dependence slice.
-; The lifetime-end intrinsics should be soundly preserved.
 define i32 @expensive_val_operand3(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) {
 ; CHECK-LABEL: @expensive_val_operand3(
 ; CHECK-NEXT:    [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
@@ -198,19 +197,64 @@
 ; CHECK-NEXT:    br label [[SELECT_END]]
 ; CHECK:       select.end:
 ; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ]
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[A]])
 ; CHECK-NEXT:    ret i32 [[SEL]]
 ;
   %load = load i32, ptr %a, align 8
-  call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %a)
   %x = add i32 %load, %b
   %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17
   ret i32 %sel
 }
 
-; Multiple uses of the load value operand.
-define i32 @expensive_val_operand4(i32 %a, ptr nocapture %b, i32 %x, i1 %cmp) {
+; Expensive cold value operand with unsafe-to-sink load (partial slice sinking).
+define i32 @expensive_val_operand4(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) {
 ; CHECK-LABEL: @expensive_val_operand4(
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    call void @free(ptr [[A]])
+; CHECK-NEXT:    [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
+; CHECK-NEXT:    br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]]
+; CHECK:       select.true.sink:
+; CHECK-NEXT:    [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]]
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %load = load i32, ptr %a, align 8
+  call void @free(ptr %a)
+  %x = add i32 %load, %b
+  %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17
+  ret i32 %sel
+}
+
+; Expensive cold value operand with potentially-unsafe-to-sink load (located
+; in a different basic block and thus unchecked for sinkability).
+define i32 @expensive_val_operand5(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) {
+; CHECK-LABEL: @expensive_val_operand5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
+; CHECK-NEXT:    br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]]
+; CHECK:       select.true.sink:
+; CHECK-NEXT:    [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]]
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[BB1]] ]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+entry:
+  %load = load i32, ptr %a, align 8
+  br label %bb1
+bb1:                                 ; preds = %entry
+  %x = add i32 %load, %b
+  %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17
+  ret i32 %sel
+}
+
+; Multiple uses of the load value operand.
+define i32 @expensive_val_operand6(i32 %a, ptr nocapture %b, i32 %x, i1 %cmp) {
+; CHECK-LABEL: @expensive_val_operand6(
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SEL]], [[LOAD]]
@@ -450,8 +494,7 @@
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare void @llvm.dbg.value(metadata, metadata, metadata)
 
-; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+declare void @free(ptr nocapture)
 
 !llvm.module.flags = !{!0, !26, !27}
 !0 = !{i32 1, !"ProfileSummary", !1}