diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1483,13 +1483,19 @@
     int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI, DL);
     if (Offset >= 0) {
       // We can coerce a constant load into a load.
-      if (auto *C = dyn_cast<Constant>(lookupOperandLeader(DepLI)))
+      if (auto *C = dyn_cast<Constant>(lookupOperandLeader(DepLI))) {
         if (auto *PossibleConstant =
                 getConstantLoadValueForLoad(C, Offset, LoadType, DL)) {
           LLVM_DEBUG(dbgs() << "Coercing load from load " << *LI
                             << " to constant " << *PossibleConstant << "\n");
           return createConstantExpression(PossibleConstant);
         }
+      } else {
+        // Similarly, we do not create a load expression for the loads that are
+        // optimized with load coercion.
+        const_cast<NewGVN *>(this)->LoadCoercion[LI] = DefiningAccess;
+        return nullptr;
+      }
     }
   } else if (auto *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
     int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI, DL);
@@ -1559,8 +1565,51 @@
                                           DefiningInst, DefiningAccess))
         return CoercionResult;
     }
+  } else {
+    // Here, we try to find loads which can be optimized with load coercion and
+    // they are live on the entry.
+    for (const auto &U : DefiningAccess->uses())
+      if (auto *MemUse = dyn_cast<MemoryUse>(U.getUser())) {
+        LoadInst *DependingLoad = dyn_cast<LoadInst>(MemUse->getMemoryInst());
+
+        if (!DependingLoad || LI == DependingLoad)
+          continue;
+
+        // If two load instructions have the same operands, then it is not load
+        // coercion.
+        if (DependingLoad->getPointerOperand() == LI->getPointerOperand())
+          continue;
+
+        // The DependingLoad should have bigger bit size than the load that we
+        // should optimize.
+        if (DL.getTypeSizeInBits(DependingLoad->getType()).getFixedSize() <
+            DL.getTypeSizeInBits(LI->getType()).getFixedSize())
+          continue;
+
+        // If we have already found an optimization opportunity for a load
+        // instruction, then we do not process it again.
+        if (LoadCoercion.count(LI))
+          continue;
+
+        // If the depending load does not have any uses, then we should not do
+        // load coercion because the depending load will be eliminated.
+        if (DependingLoad->getNumUses() == 0)
+          continue;
+
+        // The two loads should be executed in the right order.
+        if (!DT->dominates(DependingLoad, LI))
+          continue;
+
+        performSymbolicLoadCoercion(LI->getType(), LI->getPointerOperand(), LI,
+                                    DependingLoad,
+                                    getMemoryAccess(DependingLoad));
+      }
   }
 
+  // We do not create a load expression for the loads of load coercion.
+  if (LoadCoercion.count(LI))
+    return nullptr;
+
   const auto *LE = createLoadExpression(LI->getType(), LoadAddressLeader, LI,
                                         DefiningAccess);
   // If our MemoryLeader is not our defining access, add a use to the
@@ -3890,6 +3939,42 @@
       // Emits the sequence of the instructions that replace the load.
       NewValue = getStoreValueForLoad(Store->getValueOperand(), Offset,
                                       LoadToOptimizeTy, InsertPtr, DL);
+    } else if (LoadInst *Load = dyn_cast<LoadInst>(DependingInsn)) {
+      int Offset = analyzeLoadFromClobberingLoad(
+          LoadToOptimizeTy, LoadToOptimize->getPointerOperand(), Load, DL);
+
+      // In case, the depending load has already been optimized, than we get the
+      // load that we used to do the optimization of the depending load. In the
+      // following example, %V2 will be optimized by %V1 according to
+      // LoadCoercion map. Similarly, %V3 will be optimized by %V2.
+      //  %P1 = bitcast i8* %P to <4 x i32>*
+      //  %V1 = load <4 x i32>, <4 x i32>* %P1
+      //  %P2 = bitcast i8* %P to <2 x i32>*
+      //  %V2 = load <2 x i32>, <2 x i32>* %P2
+      //  %P3 = bitcast i8* %P to i32*
+      //  %V3 = load i32, i32* %P3
+      // We first process %V2. Next, we process %V3. Since %V2 has already been
+      // optimized, we use %V1 to optimize %V3. The code after load coercion and
+      // before the elimination phase will be:
+      //  %P1 = bitcast i8* %P to <4 x i32>*
+      //  %V1 = load <4 x i32>, <4 x i32>* %P1, align 16
+      //  %P2 = bitcast i8* %P to <2 x i32>*
+      //  %0 = bitcast <4 x i32> %V1 to i128
+      //  %1 = trunc i128 %0 to i64
+      //  %2 = bitcast i64 %1 to <2 x i32>
+      //  %P3 = bitcast i8* %P to i32*
+      //  %3 = bitcast <4 x i32> %V1 to i128
+      //  %4 = trunc i128 %0 to i32
+      LoadInst *DependingLoad = Load;
+      if (OptimizedLoads.count(DependingLoad)) {
+        while (LoadCoercion.count(DependingLoad))
+          DependingLoad =
+              cast<LoadInst>(cast<MemoryUseOrDef>(LoadCoercion[DependingLoad])
+                                 ->getMemoryInst());
+      }
+      // Emits the sequence of the instructions that replace the load.
+      NewValue = getLoadValueForLoad(DependingLoad, Offset, LoadToOptimizeTy,
+                                     InsertPtr, DL);
     }
     OptimizedLoads.insert(LoadToOptimize);
     InstructionsToErase.insert(LoadToOptimize);
@@ -3907,8 +3992,10 @@
         continue;
 
       // Check if the two load instructions have the same type and if the memory
-      // instructions that they depend on have the same memory access.
-      if (LoadToOptimize->getType() == LI->getType() && MA == P.second) {
+      // instructions that they depend on have the same memory access and if LI
+      // is executed after the LoadToOptimize.
+      if (LoadToOptimize->getType() == LI->getType() && MA == P.second &&
+          DT->dominates(LoadToOptimize, LI)) {
         OptimizedLoads.insert(LI);
         InstructionsToErase.insert(LI);
         LI->replaceAllUsesWith(NewValue);
diff --git a/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll
--- a/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll
+++ b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll
@@ -13,9 +13,8 @@
 ; NEWGVN-LABEL: @test1(
 ; NEWGVN-NEXT:    [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P1]] to i8*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i8, i8* [[P2]], align 1
-; NEWGVN-NEXT:    [[V3:%.*]] = trunc i32 [[V1]] to i8
-; NEWGVN-NEXT:    [[V4:%.*]] = add i8 [[V2]], [[V3]]
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V1]] to i8
+; NEWGVN-NEXT:    [[V4:%.*]] = add i8 [[TMP1]], [[TMP1]]
 ; NEWGVN-NEXT:    ret i8 [[V4]]
 ;
   %V1 = load i32, i32* %P1
@@ -42,10 +41,9 @@
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i8* [[P:%.*]] to <2 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i8* [[P]] to i32*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i32, i32* [[P2]], align 4
-; NEWGVN-NEXT:    [[V3:%.*]] = bitcast <2 x i32> [[V1]] to i64
-; NEWGVN-NEXT:    [[V4:%.*]] = trunc i64 [[V3]] to i32
-; NEWGVN-NEXT:    [[V5:%.*]] = add i32 [[V2]], [[V4]]
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; NEWGVN-NEXT:    [[V5:%.*]] = add i32 [[TMP1]], [[TMP1]]
 ; NEWGVN-NEXT:    ret i32 [[V5]]
 ;
 entry:
@@ -76,9 +74,10 @@
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i8* [[P:%.*]] to <2 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i8* [[P]] to i32*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i32, i32* [[P2]], align 4
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
 ; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ <2 x i32>, i32 }> undef, <2 x i32> [[V1]], 0
-; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32 }> [[I1]], i32 [[V2]], 1
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32 }> [[I1]], i32 [[TMP1]], 1
 ; NEWGVN-NEXT:    ret <{ <2 x i32>, i32 }> [[I2]]
 ;
 entry:
@@ -142,8 +141,10 @@
 ; NEWGVN-NEXT:    [[V0:%.*]] = load i32, i32* [[P0]], align 4
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i8* [[P]] to <2 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
 ; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ <2 x i32>, i32, i32 }> undef, <2 x i32> [[V1]], 0
-; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, i32 }> [[I1]], i32 [[V0]], 1
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, i32 }> [[I1]], i32 [[TMP1]], 1
 ; NEWGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, i32 }> [[I2]], i32 [[V0]], 2
 ; NEWGVN-NEXT:    ret <{ <2 x i32>, i32, i32 }> [[I3]]
 ;
@@ -182,13 +183,15 @@
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i8* [[P:%.*]] to <4 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i8* [[P]] to <2 x i32>*
-; NEWGVN-NEXT:    [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64
+; NEWGVN-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
 ; NEWGVN-NEXT:    [[P3:%.*]] = bitcast i8* [[P]] to i32*
-; NEWGVN-NEXT:    [[V3:%.*]] = load i32, i32* [[P3]], align 4
+; NEWGVN-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP0]] to i32
 ; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> undef, <4 x i32> [[V1]], 0
-; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[V2]], 1
-; NEWGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[V3]], 2
-; NEWGVN-NEXT:    [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[V2]], 3
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[TMP2]], 1
+; NEWGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[TMP3]], 2
+; NEWGVN-NEXT:    [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[TMP2]], 3
 ; NEWGVN-NEXT:    ret <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I4]]
 ;
 entry:
@@ -220,13 +223,12 @@
 ; NEWGVN-LABEL: @test7(
 ; NEWGVN-NEXT:    [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P1]] to float*
-; NEWGVN-NEXT:    [[V2:%.*]] = load float, float* [[P2]], align 4
+; NEWGVN-NEXT:    [[TMP1:%.*]] = bitcast i32 [[V1]] to float
 ; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; NEWGVN:       T:
-; NEWGVN-NEXT:    ret float [[V2]]
+; NEWGVN-NEXT:    ret float [[TMP1]]
 ; NEWGVN:       F:
-; NEWGVN-NEXT:    [[V3:%.*]] = bitcast i32 [[V1]] to float
-; NEWGVN-NEXT:    ret float [[V3]]
+; NEWGVN-NEXT:    ret float [[TMP1]]
 ;
   %V1 = load i32, i32* %P1
   %P2 = bitcast i32* %P1 to float*
@@ -262,11 +264,13 @@
 ; NEWGVN-NEXT:    [[V0:%.*]] = load i32, i32* [[P0]], align 4
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i8* [[P]] to <2 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
 ; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> undef, <2 x i32> [[V1]], 0
-; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I1]], i32 [[V0]], 1
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I1]], i32 [[TMP1]], 1
 ; NEWGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I2]], i32 [[V0]], 2
-; NEWGVN-NEXT:    [[I4:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I3]], i32 [[V0]], 3
-; NEWGVN-NEXT:    [[I5:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I4]], i32 [[V0]], 4
+; NEWGVN-NEXT:    [[I4:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I3]], i32 [[TMP1]], 3
+; NEWGVN-NEXT:    [[I5:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I4]], i32 [[TMP1]], 4
 ; NEWGVN-NEXT:    ret <{ <2 x i32>, i32, i32, i32, i32 }> [[I5]]
 ;
 entry:
@@ -310,13 +314,16 @@
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i8* [[P:%.*]] to <4 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i8* [[P]] to <2 x i32>*
-; NEWGVN-NEXT:    [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64
+; NEWGVN-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
 ; NEWGVN-NEXT:    [[P3:%.*]] = bitcast i8* [[P]] to i32*
-; NEWGVN-NEXT:    [[V3:%.*]] = load i32, i32* [[P3]], align 4
+; NEWGVN-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+; NEWGVN-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
 ; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> undef, <4 x i32> [[V1]], 0
-; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[V2]], 1
-; NEWGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[V3]], 2
-; NEWGVN-NEXT:    [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[V2]], 3
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[TMP2]], 1
+; NEWGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[TMP4]], 2
+; NEWGVN-NEXT:    [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[TMP2]], 3
 ; NEWGVN-NEXT:    ret <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I4]]
 ;
 entry:
@@ -359,16 +366,18 @@
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i8* [[P:%.*]] to <4 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, <4 x i32>* [[P1]], align 16
 ; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> undef, <4 x i32> [[V1]], 0
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64
+; NEWGVN-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
+; NEWGVN-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP0]] to i32
 ; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
 ; NEWGVN:       T:
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i8* [[P]] to <2 x i32>*
-; NEWGVN-NEXT:    [[V2:%.*]] = load <2 x i32>, <2 x i32>* [[P2]], align 8
-; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], <2 x i32> [[V2]], 1
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], <2 x i32> [[TMP2]], 1
 ; NEWGVN-NEXT:    ret <{ <4 x i32>, <2 x i32>, i32 }> [[I2]]
 ; NEWGVN:       F:
 ; NEWGVN-NEXT:    [[P3:%.*]] = bitcast i8* [[P]] to i32*
-; NEWGVN-NEXT:    [[V3:%.*]] = load i32, i32* [[P3]], align 4
-; NEWGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], i32 [[V3]], 2
+; NEWGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], i32 [[TMP3]], 2
 ; NEWGVN-NEXT:    ret <{ <4 x i32>, <2 x i32>, i32 }> [[I3]]
 ;
 entry:
@@ -409,12 +418,13 @@
 ; NEWGVN-NEXT:    [[P1:%.*]] = bitcast i8* [[P:%.*]] to <2 x i32>*
 ; NEWGVN-NEXT:    [[V1:%.*]] = load <2 x i32>, <2 x i32>* [[P1]], align 8
 ; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i8* [[P]] to i32*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i32, i32* [[P2]], align 4
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
 ; NEWGVN-NEXT:    [[P3:%.*]] = bitcast i8* [[P]] to float*
-; NEWGVN-NEXT:    [[V3:%.*]] = load float, float* [[P3]], align 4
+; NEWGVN-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
 ; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ <2 x i32>, i32, float }> undef, <2 x i32> [[V1]], 0
-; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I1]], i32 [[V2]], 1
-; NEWGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I2]], float [[V3]], 2
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I1]], i32 [[TMP1]], 1
+; NEWGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I2]], float [[TMP2]], 2
 ; NEWGVN-NEXT:    ret <{ <2 x i32>, i32, float }> [[I3]]
 ;
 entry:
@@ -443,13 +453,11 @@
 ;
 ; NEWGVN-LABEL: @test12(
 ; NEWGVN-NEXT:    [[V1:%.*]] = load i32, i32* [[P1:%.*]], align 4
-; NEWGVN-NEXT:    [[P2:%.*]] = bitcast i32* [[P1]] to i8*
-; NEWGVN-NEXT:    [[V2:%.*]] = load i8, i8* [[P2]], align 1
-; NEWGVN-NEXT:    [[V3:%.*]] = trunc i32 [[V1]] to i8
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V1]] to i8
 ; NEWGVN-NEXT:    store i32 [[V:%.*]], i32* [[P1]], align 4
-; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V]] to i8
-; NEWGVN-NEXT:    [[V5:%.*]] = add i8 [[V2]], [[V3]]
-; NEWGVN-NEXT:    [[V6:%.*]] = add i8 [[TMP1]], [[V5]]
+; NEWGVN-NEXT:    [[TMP2:%.*]] = trunc i32 [[V]] to i8
+; NEWGVN-NEXT:    [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]]
+; NEWGVN-NEXT:    [[V6:%.*]] = add i8 [[TMP2]], [[V5]]
 ; NEWGVN-NEXT:    ret i8 [[V6]]
 ;
   %V1 = load i32, i32* %P1