diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -656,6 +656,15 @@
   // Deletion info.
   SmallPtrSet<Instruction *, 8> InstructionsToErase;
 
+  // Keeps the memory instructions that should be optimized with load coercion.
+  // The first value is the load instruction that should be optimized and the
+  // second value is the memory access of the memory instruction that the load
+  // depends on.
+  std::map<Value *, MemoryAccess *> LoadCoercion;
+
+  // Keeps the load instructions that have been optimized with load coercion.
+  SmallPtrSet<Value *, 2> OptimizedLoads;
+
 public:
   NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
          TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
@@ -833,6 +842,7 @@
                                     SmallVectorImpl<ValueDFS> &) const;
 
   bool eliminateInstructions(Function &);
+  bool implementLoadCoercion();
   void replaceInstruction(Instruction *, Value *);
   void markInstructionForDeletion(Instruction *);
   void deleteInstructionsInBlock(BasicBlock *);
@@ -1456,6 +1466,14 @@
                           << " to constant " << *C << "\n");
         return createConstantExpression(
             getConstantStoreValueForLoad(C, Offset, LoadType, DL));
+      } else {
+        // Add the load instructions that can be optimized with load coercion in
+        // LoadCoercion map.
+        const_cast<NewGVN *>(this)->LoadCoercion[LI] = DefiningAccess;
+        // Load coercion occurs before the elimination phase. The loads that can
+        // be optimized with load coercion are not added in any congruence
+        // class. Thus, we do not create any load expression for them.
+        return nullptr;
       }
     }
   } else if (auto *DepLI = dyn_cast<LoadInst>(DepInst)) {
@@ -2977,6 +2995,8 @@
   MemoryToUsers.clear();
   RevisitOnReachabilityChange.clear();
   IntrinsicInstPred.clear();
+  LoadCoercion.clear();
+  OptimizedLoads.clear();
 }
 
 // Assign local DFS number mapping to instructions, and leave space for Value
@@ -3481,6 +3501,17 @@
   verifyIterationSettled(F);
   verifyStoreExpressions();
 
+  // During load coercion, we replace the candidate load instructions with a new
+  // sequence of instructions. Next, we run value numbering which adds the new
+  // instructions in the right congruence classes. In this way, any redundant
+  // instructions will be optimized away in the elimiantion phase. Hence, it
+  // makes sense to do the load coercion before the elimination phase.
+  if (implementLoadCoercion())
+    // The newly generated instructions need to get a DFS number for the
+    // elimination phase.
+    // TODO: Update DFS numbers faster.
+    ICount = updateDFSNumbers(ICount);
+
   Changed |= eliminateInstructions(F);
 
   // Delete all instructions marked for deletion.
@@ -3816,6 +3847,100 @@
   return nullptr;
 }
 
+// Iterate over the load instructions of LoadCoercion map and it replaces them
+// with the right sequence of instructions. Next, it checks if any other load in
+// LoadCoercion map can be optimized with the newly generated instructions.
+// Finally, it runs value numbering for the new instructions. In this way, any
+// redundant instructions will be eliminated in the elimination phase.
+bool NewGVN::implementLoadCoercion() {
+  bool AnythingReplaced = false;
+  for (const auto &P : LoadCoercion) {
+    LoadInst *LoadToOptimize = cast<LoadInst>(P.first);
+    // The LoadCoercion map might have multiple load instructions that can be
+    // optimized with the same sequence of instructions. In this case, we
+    // process only the first load and we optimize the other loads with the
+    // sequence of instructions that we emitted for the first load. Here, we
+    // skip these loads in order not to process them again.
+    if (OptimizedLoads.count(LoadToOptimize))
+      continue;
+
+    Type *LoadToOptimizeTy = LoadToOptimize->getType();
+    Value *NewValue = nullptr;
+    MemoryAccess *MA = P.second;
+    Instruction *DependingInsn = cast<MemoryUseOrDef>(MA)->getMemoryInst();
+    BasicBlock *DependingInsnBB = cast<Instruction>(DependingInsn)->getParent();
+    Instruction *InsnBeforeLoadToOptimize = LoadToOptimize->getPrevNode();
+    Instruction *InsnBeforeTerminatorInDependingInsnBB =
+        DependingInsnBB->getTerminator()->getPrevNode();
+    bool DefUSeAreInDifferentBB =
+        DependingInsnBB != LoadToOptimize->getParent();
+    // If the load and the depending instruction are in different basic blocks,
+    // then the new sequence of instructions is emitted at the end of the basic
+    // block of the depending instruction. In this way, the newly generated
+    // instructions can be used by other loads that are in basic blocks that are
+    // dominated by the basic block of the depending instruction. If the load
+    // and the depending instruction are in the same basic block, then we emit
+    // them just before the load.
+    Instruction *InsertPtr = DefUSeAreInDifferentBB
+                                 ? DependingInsnBB->getTerminator()
+                                 : LoadToOptimize;
+    if (StoreInst *Store = dyn_cast<StoreInst>(DependingInsn)) {
+      int Offset = analyzeLoadFromClobberingStore(
+          LoadToOptimizeTy, LoadToOptimize->getPointerOperand(), Store, DL);
+      // Emits the sequence of the instructions that replace the load.
+      NewValue = getStoreValueForLoad(Store->getValueOperand(), Offset,
+                                      LoadToOptimizeTy, InsertPtr, DL);
+    }
+    OptimizedLoads.insert(LoadToOptimize);
+    InstructionsToErase.insert(LoadToOptimize);
+    LoadToOptimize->replaceAllUsesWith(NewValue);
+    LLVM_DEBUG(dbgs() << "Load coersion: The load " << *LoadToOptimize
+                      << " was eliminated and its uses were replaced by "
+                      << *NewValue << "\n");
+
+    // Iterate the LoadCoercion map and check if there is any other load
+    // instruction that can be replaced with the same sequence of instructions.
+    for (const auto &P : LoadCoercion) {
+      LoadInst *LI = cast<LoadInst>(P.first);
+
+      if (LI == LoadToOptimize)
+        continue;
+
+      // Check if the two load instructions have the same type and if the memory
+      // instructions that they depend on have the same memory access.
+      if (LoadToOptimize->getType() == LI->getType() && MA == P.second) {
+        OptimizedLoads.insert(LI);
+        InstructionsToErase.insert(LI);
+        LI->replaceAllUsesWith(NewValue);
+        LLVM_DEBUG(dbgs() << "Load coersion: The load " << *LI
+                          << " was eliminated and its uses were replaced by "
+                          << *NewValue << "\n");
+      }
+    }
+
+    // Collect the newly generated instructions and run value numbering for
+    // them. In this way, the new instructions will be inserted in the
+    // corresponding congruence classes. In case any of these instructions are
+    // redundant, they will be optimized away in the elimiantion phase.
+    Instruction *FirstNewlyGeneratedInsn =
+        DefUSeAreInDifferentBB
+            ? InsnBeforeTerminatorInDependingInsnBB->getNextNode()
+            : InsnBeforeLoadToOptimize->getNextNode();
+    Instruction *LastNewlyGeneratedInsn = DefUSeAreInDifferentBB
+                                              ? DependingInsnBB->getTerminator()
+                                              : LoadToOptimize;
+    for (Instruction *CurInsn = FirstNewlyGeneratedInsn;
+         CurInsn != LastNewlyGeneratedInsn; CurInsn = CurInsn->getNextNode()) {
+      TOPClass->insert(CurInsn);
+      ValueToClass[CurInsn] = TOPClass;
+      valueNumberInstruction(CurInsn);
+      updateProcessedCount(CurInsn);
+    }
+    AnythingReplaced = true;
+  }
+  return AnythingReplaced;
+}
+
 bool NewGVN::eliminateInstructions(Function &F) {
   // This is a non-standard eliminator. The normal way to eliminate is
   // to walk the dominator tree in order, keeping track of available
diff --git a/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll b/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll
--- a/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll
+++ b/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll
@@ -12,8 +12,8 @@
 ; NEWGVN-LABEL: @test1(
 ; NEWGVN-NEXT:    store i32 [[V1:%.*]], i32* [[P:%.*]], align 4
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i32* [[P]] to float*
-; NEWGVN-NEXT:    [[V2:%.*]] = load float, float* [[P1]], align 4
-; NEWGVN-NEXT:    ret float [[V2]]
+; NEWGVN-NEXT:    [[TMP1:%.*]] = bitcast i32 [[V1]] to float
+; NEWGVN-NEXT:    ret float [[TMP1]]
 ;
   store i32 %V1, i32* %P
   %P1 = bitcast i32* %P to float*
@@ -33,8 +33,10 @@
 ; NEWGVN-LABEL: @test2(
 ; NEWGVN-NEXT:    store i64* [[V1:%.*]], i64** [[P1:%.*]], align 8
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i64** [[P1]] to float*
-; NEWGVN-NEXT:    [[V2:%.*]] = load float, float* [[P2]], align 4
-; NEWGVN-NEXT:    ret float [[V2]]
+; NEWGVN-NEXT:    [[TMP1:%.*]] = ptrtoint i64* [[V1]] to i64
+; NEWGVN-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; NEWGVN-NEXT:    [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
+; NEWGVN-NEXT:    ret float [[TMP3]]
 ;
   store i64* %V1, i64** %P1
   %P2 = bitcast i64** %P1 to float*
@@ -52,8 +54,8 @@
 ; NEWGVN-LABEL: @test3(
 ; NEWGVN-NEXT:    store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P1]] to i8*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i8, i8* [[P2]], align 1
-; NEWGVN-NEXT:    ret i8 [[V2]]
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V1]] to i8
+; NEWGVN-NEXT:    ret i8 [[TMP1]]
 ;
   store i32 %V1, i32* %P1
   %P2 = bitcast i32* %P1 to i8*
@@ -72,8 +74,9 @@
 ; NEWGVN-LABEL: @test4(
 ; NEWGVN-NEXT:    store i64 [[V1:%.*]], i64* [[P1:%.*]], align 4
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i64* [[P1]] to float*
-; NEWGVN-NEXT:    [[V2:%.*]] = load float, float* [[P2]], align 4
-; NEWGVN-NEXT:    ret float [[V2]]
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[V1]] to i32
+; NEWGVN-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; NEWGVN-NEXT:    ret float [[TMP2]]
 ;
   store i64 %V1, i64* %P1
   %P2 = bitcast i64* %P1 to float*
@@ -115,8 +118,8 @@
 ; NEWGVN-LABEL: @test6(
 ; NEWGVN-NEXT:    store i64 [[V1:%.*]], i64* [[P1:%.*]], align 4
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i64* [[P1]] to i8**
-; NEWGVN-NEXT:    [[V2:%.*]] = load i8*, i8** [[P2]], align 8
-; NEWGVN-NEXT:    ret i8* [[V2]]
+; NEWGVN-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[V1]] to i8*
+; NEWGVN-NEXT:    ret i8* [[TMP1]]
 ;
   store i64 %V1, i64* %P1
   %P2 = bitcast i64* %P1 to i8**
@@ -135,8 +138,9 @@
 ; NEWGVN-LABEL: @test7(
 ; NEWGVN-NEXT:    store double [[V1:%.*]], double* [[P1:%.*]], align 8
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast double* [[P1]] to i32*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i32, i32* [[P2]], align 4
-; NEWGVN-NEXT:    ret i32 [[V2]]
+; NEWGVN-NEXT:    [[TMP1:%.*]] = bitcast double [[V1]] to i64
+; NEWGVN-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; NEWGVN-NEXT:    ret i32 [[TMP2]]
 ;
   store double %V1, double* %P1
   %P2 = bitcast double* %P1 to i32*
@@ -157,8 +161,9 @@
 ; NEWGVN-NEXT:    store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P1]] to i8*
 ; NEWGVN-NEXT:    [[P3:%.*]] = getelementptr i8, i8* [[P2]], i32 2
-; NEWGVN-NEXT:    [[V2:%.*]] = load i8, i8* [[P3]], align 1
-; NEWGVN-NEXT:    ret i8 [[V2]]
+; NEWGVN-NEXT:    [[TMP1:%.*]] = lshr i32 [[V1]], 16
+; NEWGVN-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+; NEWGVN-NEXT:    ret i8 [[TMP2]]
 ;
   store i32 %V1, i32* %P1
   %P2 = bitcast i32* %P1 to i8*
@@ -180,14 +185,12 @@
 ;
 ; NEWGVN-LABEL: @test9(
 ; NEWGVN-NEXT:    store i64 [[V:%.*]], i64* [[P1:%.*]], align 4
-; NEWGVN-NEXT:    [[P3:%.*]] = bitcast i64* [[P1]] to double*
+; NEWGVN-NEXT:    [[TMP1:%.*]] = bitcast i64 [[V]] to double
 ; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; NEWGVN:       T:
-; NEWGVN-NEXT:    [[B:%.*]] = load double, double* [[P3]], align 8
-; NEWGVN-NEXT:    ret double [[B]]
+; NEWGVN-NEXT:    ret double [[TMP1]]
 ; NEWGVN:       F:
-; NEWGVN-NEXT:    [[C:%.*]] = load double, double* [[P3]], align 8
-; NEWGVN-NEXT:    ret double [[C]]
+; NEWGVN-NEXT:    ret double [[TMP1]]
 ;
   %A = load i64 , i64* %P1
   store i64 %V, i64* %P1
@@ -217,11 +220,11 @@
 ; NEWGVN-LABEL: @test10(
 ; NEWGVN-NEXT:    store i32 [[V0:%.*]], i32* [[P:%.*]], align 4
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i32* [[P]] to float*
-; NEWGVN-NEXT:    [[V1:%.*]] = load float, float* [[P1]], align 4
+; NEWGVN-NEXT:    [[TMP1:%.*]] = bitcast i32 [[V0]] to float
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to i8*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i8, i8* [[P2]], align 1
-; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ i8, float }> undef, i8 [[V2]], 0
-; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ i8, float }> [[I1]], float [[V1]], 1
+; NEWGVN-NEXT:    [[TMP2:%.*]] = trunc i32 [[V0]] to i8
+; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ i8, float }> undef, i8 [[TMP2]], 0
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ i8, float }> [[I1]], float [[TMP1]], 1
 ; NEWGVN-NEXT:    ret <{ i8, float }> [[I2]]
 ;
   store i32 %V0, i32* %P
@@ -251,16 +254,16 @@
 ;
 ; NEWGVN-LABEL: @test11(
 ; NEWGVN-NEXT:    store i32 [[V0:%.*]], i32* [[P:%.*]], align 4
+; NEWGVN-NEXT:    [[TMP1:%.*]] = bitcast i32 [[V0]] to float
+; NEWGVN-NEXT:    [[TMP2:%.*]] = trunc i32 [[V0]] to i8
 ; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; NEWGVN:       T:
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i32* [[P]] to float*
-; NEWGVN-NEXT:    [[V1:%.*]] = load float, float* [[P1]], align 4
-; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ i8, float }> undef, float [[V1]], 1
+; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ i8, float }> undef, float [[TMP1]], 1
 ; NEWGVN-NEXT:    ret <{ i8, float }> [[I1]]
 ; NEWGVN:       F:
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to i8*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i8, i8* [[P2]], align 1
-; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ i8, float }> undef, i8 [[V2]], 0
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ i8, float }> undef, i8 [[TMP2]], 0
 ; NEWGVN-NEXT:    ret <{ i8, float }> [[I2]]
 ;
   store i32 %V0, i32* %P
@@ -295,16 +298,13 @@
 ;
 ; NEWGVN-LABEL: @test12(
 ; NEWGVN-NEXT:    store i32 [[V0:%.*]], i32* [[P:%.*]], align 4
+; NEWGVN-NEXT:    [[TMP1:%.*]] = bitcast i32 [[V0]] to float
 ; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; NEWGVN:       T:
-; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i32* [[P]] to float*
-; NEWGVN-NEXT:    [[V1:%.*]] = load float, float* [[P1]], align 4
-; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ float, float }> undef, float [[V1]], 1
+; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ float, float }> undef, float [[TMP1]], 1
 ; NEWGVN-NEXT:    ret <{ float, float }> [[I1]]
 ; NEWGVN:       F:
-; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to float*
-; NEWGVN-NEXT:    [[V2:%.*]] = load float, float* [[P2]], align 4
-; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ float, float }> undef, float [[V2]], 0
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ float, float }> undef, float [[TMP1]], 0
 ; NEWGVN-NEXT:    ret <{ float, float }> [[I2]]
 ;
   store i32 %V0, i32* %P
@@ -335,10 +335,9 @@
 ; NEWGVN-LABEL: @test13(
 ; NEWGVN-NEXT:    store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P1]] to i8*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i8, i8* [[P2]], align 1
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V1]] to i8
 ; NEWGVN-NEXT:    [[P3:%.*]] = bitcast i32* [[P1]] to i64*
-; NEWGVN-NEXT:    [[V4:%.*]] = trunc i32 [[V1]] to i8
-; NEWGVN-NEXT:    [[V5:%.*]] = add i8 [[V2]], [[V4]]
+; NEWGVN-NEXT:    [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]]
 ; NEWGVN-NEXT:    ret i8 [[V5]]
 ;
   store i32 %V1, i32* %P1
@@ -361,9 +360,8 @@
 ;
 ; NEWGVN-LABEL: @test14(
 ; NEWGVN-NEXT:    store i32 [[V1:%.*]], i32* [[P1:%.*]], align 4
-; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P1]] to i8*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i8, i8* [[P2]], align 1
-; NEWGVN-NEXT:    [[V5:%.*]] = add i8 [[V2]], [[V2]]
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V1]] to i8
+; NEWGVN-NEXT:    [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]]
 ; NEWGVN-NEXT:    ret i8 [[V5]]
 ;
   store i32 %V1, i32* %P1
diff --git a/llvm/test/Transforms/NewGVN/pr14166-xfail.ll b/llvm/test/Transforms/NewGVN/pr14166-xfail.ll
--- a/llvm/test/Transforms/NewGVN/pr14166-xfail.ll
+++ b/llvm/test/Transforms/NewGVN/pr14166-xfail.ll
@@ -1,4 +1,3 @@
-; XFAIL: *
 ; RUN: opt -disable-basic-aa -passes=newgvn -S < %s | FileCheck %s
 ; NewGVN fails this due to missing load coercion
 target datalayout = "e-p:32:32:32"