diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2025,60 +2025,91 @@
   // For constant GEPs, use a more general offset-based folding approach.
   // Only do this for opaque pointers, as the result element type may change.
   Type *PtrTy = Src->getType()->getScalarType();
-  if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() &&
+  if (PtrTy->isOpaquePointerTy() &&
       (Src->hasOneUse() || Src->hasAllConstantIndices())) {
-    // Split Src into a variable part and a constant suffix.
-    gep_type_iterator GTI = gep_type_begin(*Src);
-    Type *BaseType = GTI.getIndexedType();
-    bool IsFirstType = true;
-    unsigned NumVarIndices = 0;
-    for (auto Pair : enumerate(Src->indices())) {
-      if (!isa<ConstantInt>(Pair.value())) {
-        BaseType = GTI.getIndexedType();
-        IsFirstType = false;
-        NumVarIndices = Pair.index() + 1;
-      }
-      ++GTI;
-    }
+    // Try to merge GEP (Back of BB) into Src (Front of BB) first, if that
+    // fails, swap the two GEP whenever valid and try again. This is valid as
+    // long as GEP's indices do not depend on Src.
+    GetElementPtrInst *Back = &GEP;
+    GetElementPtrInst *Front = cast<GetElementPtrInst>(Src);
+    for (int i = 0; i < 2; std::swap(Back, Front), i++) {
+      // Swap is not valid if GEP's indices depend on Src. For example
+      // a = gep x, 1
+      // b = gep a, func(a), 1
+      // Also if Src is used by other instructions than GEP, merging constant-
+      // indexed Src to non-constant GEP does not reduce instruction count.
+      if (Front == &GEP && !Src->hasOneUse())
+        break;
 
-    // Determine the offset for the constant suffix of Src.
-    APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
-    if (NumVarIndices != Src->getNumIndices()) {
-      // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
-      if (isa<ScalableVectorType>(BaseType))
-        return nullptr;
+      if (Back->hasAllConstantIndices()) {
+        // Split Front into a variable part and a constant suffix.
+        gep_type_iterator GTI = gep_type_begin(Front);
+        Type *BaseType = GTI.getIndexedType();
+        bool IsFirstType = true;
+        unsigned NumVarIndices = 0;
+        for (auto Pair : enumerate(Front->indices())) {
+          if (!isa<ConstantInt>(Pair.value())) {
+            BaseType = GTI.getIndexedType();
+            IsFirstType = false;
+            NumVarIndices = Pair.index() + 1;
+          }
+          ++GTI;
+        }
 
-      SmallVector<Value *> ConstantIndices;
-      if (!IsFirstType)
-        ConstantIndices.push_back(
-            Constant::getNullValue(Type::getInt32Ty(GEP.getContext())));
-      append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
-      Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
-    }
+        // Determine the offset for the constant suffix of Front.
+        APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
+        if (NumVarIndices != Front->getNumIndices()) {
+          // FIXME: getIndexedOffsetInType() does not handle scalable vectors.
+          if (isa<ScalableVectorType>(BaseType))
+            continue;
 
-    // Add the offset for GEP (which is fully constant).
-    if (!GEP.accumulateConstantOffset(DL, Offset))
-      return nullptr;
+          SmallVector<Value *> ConstantIndices;
+          if (!IsFirstType)
+            ConstantIndices.push_back(
+                Constant::getNullValue(Type::getInt32Ty(GEP.getContext())));
+          append_range(ConstantIndices,
+                       drop_begin(Front->indices(), NumVarIndices));
+          Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
+        }
 
-    // Convert the total offset back into indices.
-    SmallVector<APInt> ConstIndices =
-        DL.getGEPIndicesForOffset(BaseType, Offset);
-    if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero()))
-      return nullptr;
+        // Add the offset for Back (which is fully constant).
+        if (!Back->accumulateConstantOffset(DL, Offset))
+          continue;
 
-    SmallVector<Value *> Indices;
-    append_range(Indices, drop_end(Src->indices(),
-                                   Src->getNumIndices() - NumVarIndices));
-    for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType))
-      Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
+        APInt OffsetOld = Offset;
+        // Convert the total offset back into indices.
+        SmallVector<APInt> ConstIndices =
+            DL.getGEPIndicesForOffset(BaseType, Offset);
+        if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
+          // If both GEP are constant-indexed, and cannot be merged in either
+          // way, convert them to a GEP of i8.
+          if (Front == &GEP && Front->hasAllConstantIndices()) {
+            return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
+                       ? GetElementPtrInst::CreateInBounds(
+                             Builder.getInt8Ty(), Src->getOperand(0),
+                             Builder.getInt(OffsetOld), GEP.getName())
+                       : GetElementPtrInst::Create(
+                             Builder.getInt8Ty(), Src->getOperand(0),
+                             Builder.getInt(OffsetOld), GEP.getName());
+          }
+          continue;
+        }
 
-    return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
-               ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(),
-                                                   Src->getOperand(0), Indices,
-                                                   GEP.getName())
-               : GetElementPtrInst::Create(Src->getSourceElementType(),
-                                           Src->getOperand(0), Indices,
-                                           GEP.getName());
+        SmallVector<Value *> Indices;
+        append_range(Indices, drop_end(Front->indices(),
+                                       Front->getNumIndices() - NumVarIndices));
+        for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType))
+          Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
+
+        return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
+                   ? GetElementPtrInst::CreateInBounds(
+                         Front->getSourceElementType(), Src->getOperand(0),
+                         Indices, Back->getName())
+                   : GetElementPtrInst::Create(Front->getSourceElementType(),
+                                               Src->getOperand(0), Indices,
+                                               Back->getName());
+      }
+    }
   }
 
   if (Src->getResultElementType() != GEP.getSourceElementType())
diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
--- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
@@ -36,9 +36,8 @@
 ; result = (i8*) p + 10
 define ptr @mergeReverse(ptr %p) {
 ; CHECK-LABEL: @mergeReverse(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i64, ptr %p, i64 1
   %2 = getelementptr inbounds i8, ptr %1, i64 2
@@ -70,9 +69,8 @@
 ; result = (i8*) p + 14
 define ptr @array2(ptr %p, i64 %a) {
 ; CHECK-LABEL: @array2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [7 x i32], ptr [[P:%.*]], i64 0, i64 3
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 14
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds [7 x i32], ptr %p, i64 0, i64 3
   %2 = getelementptr inbounds i8, ptr %1, i64 2
@@ -82,9 +80,8 @@
 ; result = (i8*) (([3 x i8]*) p + 6) + 2
 define ptr @array3(ptr %p) {
 ; CHECK-LABEL: @array3(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [3 x i8], ptr [[TMP1]], i64 1, i64 1
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [3 x i8], ptr [[P:%.*]], i64 6, i64 2
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i64, ptr %p, i64 2
   %2 = getelementptr inbounds [3 x i8], ptr %1, i64 1, i64 1
@@ -94,9 +91,8 @@
 ; result = (struct.C*) p + 3
 define ptr @struct1(ptr %p) {
 ; CHECK-LABEL: @struct1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 3
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 3
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i64, ptr %p, i64 3
   %2 = getelementptr inbounds %struct.C, ptr %1, i64 1
@@ -127,9 +123,8 @@
 ; result = ((struct.C*) p + 1).member2
 define ptr @struct4(ptr %p, i64 %a) {
 ; CHECK-LABEL: @struct4(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 1, i32 2
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i64, ptr %p, i64 1
   %2 = getelementptr inbounds %struct.C, ptr %1, i64 1
@@ -164,9 +159,8 @@
 ; result = (i8*) &((struct.A*) &((struct.B*) p)[0].member2).member0 + 2
 define ptr @appendIndexReverse(ptr %p) {
 ; CHECK-LABEL: @appendIndexReverse(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP1]], i64 0, i32 1
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 2
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i64, ptr %p, i64 1
   %2 = getelementptr inbounds %struct.B, ptr %1, i64 0, i32 1
@@ -178,22 +172,20 @@
 ; result = &((struct.C*) p + 2).member1
 define ptr @structMemberAliasing(ptr %p, i64 %a) {
 ; CHECK-LABEL: @structMemberAliasing(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1, i32 2
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 2, i32 1
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i64, ptr %p, i64 1
   %2 = getelementptr inbounds %struct.C, ptr %1, i64 1, i32 2
   ret ptr %2
 }
 
-; Negative test. Offset of either GEP is not divisible by the other's size.
+; GEP cannot be merged either direction, but can be canonicalized as GEP of i8.
 ; Here i24 is 8-bit aligned.
 define ptr @notDivisible(ptr %p) {
 ; CHECK-LABEL: @notDivisible(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 7
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i24, ptr %p, i64 1
   %2 = getelementptr inbounds i32, ptr %1, i64 1
@@ -206,9 +198,8 @@
 ; result = (i32*) (([4 x i32]*) p + a) + 3
 define ptr @partialConstant1(ptr %p, i64 %a) {
 ; CHECK-LABEL: @partialConstant1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP1]], i64 [[A:%.*]], i64 2
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 [[A:%.*]], i64 3
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i32, ptr %p, i64 1
   %2 = getelementptr inbounds [4 x i32], ptr %1, i64 %a, i64 2
@@ -249,9 +240,8 @@
 ; result = &((struct.C*) p + a).member2
 define ptr @partialConstantMemberAliasing1(ptr %p, i64 %a) {
 ; CHECK-LABEL: @partialConstantMemberAliasing1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 1
-; CHECK-NEXT:    ret ptr [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[A:%.*]], i32 2
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i32, ptr %p, i64 1
   %2 = getelementptr inbounds %struct.C, ptr %1, i64 %a, i32 1
diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -211,8 +211,7 @@
 
 define ptr @geps_combinable_different_elem_type4(ptr %a) {
 ; CHECK-LABEL: @geps_combinable_different_elem_type4(
-; CHECK-NEXT:    [[A2:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 0, i32 1
-; CHECK-NEXT:    [[A3:%.*]] = getelementptr i8, ptr [[A2]], i64 10
+; CHECK-NEXT:    [[A3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 14
 ; CHECK-NEXT:    ret ptr [[A3]]
 ;
   %a2 = getelementptr { i32, i32 }, ptr %a, i32 0, i32 1