diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -149,6 +149,7 @@
   Instruction *visitPHINode(PHINode &PN);
   Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
   Instruction *visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src);
+  Instruction *swapGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src);
   Instruction *visitGEPOfBitcast(BitCastInst *BCI, GetElementPtrInst &GEP);
   Instruction *visitAllocaInst(AllocaInst &AI);
   Instruction *visitAllocSite(Instruction &FI);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1978,13 +1978,6 @@
     }
   }
 
-  // Note that if our source is a gep chain itself then we wait for that
-  // chain to be resolved before we perform this transformation.  This
-  // avoids us creating a TON of code in some cases.
-  if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
-    if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
-      return nullptr;   // Wait until our source is folded to completion.
-
   // For constant GEPs, use a more general offset-based folding approach.
   // Only do this for opaque pointers, as the result element type may change.
   Type *PtrTy = Src->getType()->getScalarType();
@@ -2125,6 +2118,33 @@
   return nullptr;
 }
 
+Instruction *InstCombinerImpl::swapGEPOfGEP(GetElementPtrInst &GEP,
+                                            GEPOperator *Src) {
+  // If GEP of GEP cannot be combined into one instruction, and the second GEP
+  // is constant-indexed, we perform canonicalize swapping to move it before the
+  // non-constant-indexed GEP. This potentially allows the application of some
+  // optimizations in visitGEPofGEP.
+  // Only swap if it doesn't violate use-def rule, and pointer types are
+  // compatible (opaque ptr or GEP and Src must be same type, meaning they must
+  // both have 1 index).
+  if (Src->hasOneUse() &&
+      ((Src->getPointerOperandType()->isOpaquePointerTy() &&
+        GEP.getPointerOperandType()->isOpaquePointerTy()) ||
+       (Src->getNumIndices() == 1 && GEP.getNumIndices() == 1)) &&
+      !Src->hasAllConstantIndices() && GEP.hasAllConstantIndices()) {
+    // Cannot guarantee inbounds after swapping because the non-const GEP can
+    // have arbitrary sign.
+    Value *NewSrc =
+        Builder.CreateGEP(GEP.getSourceElementType(), Src->getOperand(0),
+                          SmallVector<Value *>(GEP.indices()), Src->getName());
+    GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
+        Src->getSourceElementType(), NewSrc,
+        SmallVector<Value *>(Src->indices()), GEP.getName());
+    return NewGEP;
+  }
+  return nullptr;
+}
+
 // Note that we may have also stripped an address space cast in between.
 Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI,
                                                  GetElementPtrInst &GEP) {
@@ -2409,9 +2429,12 @@
     PtrOp = NewGEP;
   }
 
-  if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
+  if (auto *Src = dyn_cast<GEPOperator>(PtrOp)) {
     if (Instruction *I = visitGEPOfGEP(GEP, Src))
       return I;
+    else if (Instruction *I = swapGEPOfGEP(GEP, Src))
+      return I;
+  }
 
   // Skip if GEP source element type is scalable. The type alloc size is unknown
   // at compile-time.
diff --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
--- a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
@@ -1,12 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instcombine -opaque-pointers -S | FileCheck %s
+; RUN: opt < %s -instcombine -licm -opaque-pointers -S | FileCheck %s
 
 ; Constant-indexed GEP instructions in a chain of GEP instructions should be
-; swapped to the end whenever such transformation is valid. This allows them to
-; be merged.
-
-declare void @use(i1)
+; swapped to the front whenever such transformation is valid. This allows them
+; to be merged.
 
+declare void @use(ptr)
 
 ; The constant-indexed GEP instruction should be swapped to the end, even
 ; without merging.
@@ -24,36 +23,25 @@
   ret ptr %3
 }
 
-; GEP with the last index being a constant should also be swapped.
-define ptr @partialConstant1(ptr %p, i64 %a, i64 %b) {
-; CHECK-LABEL: @partialConstant1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]]
-; CHECK-NEXT:    ret ptr [[TMP1]]
-;
-  %1 = getelementptr inbounds [4 x i32], ptr %p, i64 %a, i64 1
-  %2 = getelementptr inbounds i32, ptr %p, i64 %b
-  ret ptr %2
-}
-
-; Negative test. GEP should not be swapped if the last index is not a constant.
-define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) {
-; CHECK-LABEL: @partialConstant2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]]
-; CHECK-NEXT:    ret ptr [[TMP1]]
+; Negative test. GEP should not be swapped if indices are not constant.
+define ptr @partialConstant(ptr %p, i64 %a, i64 %b) {
+; CHECK-LABEL: @partialConstant(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 1, i64 [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[B:%.*]]
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds [4 x i32], ptr %p, i64 1, i64 %a
-  %2 = getelementptr inbounds i32, ptr %p, i64 %b
+  %2 = getelementptr inbounds i32, ptr %1, i64 %b
   ret ptr %2
 }
 
-; Constant-indexed GEP are merged after swawpping.
+; Constant-indexed GEP are merged after swapping.
 ; result = ((i32*) p + a) + 3
 define ptr @merge(ptr %p, i64 %a) {
 ; CHECK-LABEL: @merge(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 3
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[A:%.*]]
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds i32, ptr %p, i64 1
   %2 = getelementptr inbounds i32, ptr %1, i64 %a
@@ -61,67 +49,85 @@
   ret ptr %3
 }
 
-; Multiple constant-indexed GEP. Note that the first two cannot be merged at
-; first, but after the second and third are merged, the result can be merged
-; with the first one on the next pass.
-; result = (<3 x i32>*) ((i16*) ((i8*) ptr + a) + (a * b)) + 9
+; Multiple constant-indexed GEP. All constant-index GEP will eventually be
+; swapped to the front and merged.
+; result = (i16*) (((i8*) p + 25) + a) + b
 define ptr @nested(ptr %p, i64 %a, i64 %b) {
 ; CHECK-LABEL: @nested(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <3 x i32>, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds <5 x i32>, ptr [[TMP2]], i64 4
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, ptr [[TMP5]], i64 1
-; CHECK-NEXT:    ret ptr [[TMP6]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 25
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[A:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i16, ptr [[TMP2]], i64 [[B:%.*]]
+; CHECK-NEXT:    ret ptr [[TMP3]]
 ;
-  %1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1
+  %1 = getelementptr inbounds i64, ptr %p, i64 1
   %2 = getelementptr inbounds i8, ptr %1, i64 %a
-  %3 = mul i64 %a, %b
-  %4 = getelementptr inbounds <5 x i32>, ptr %2, i64 4
-  %5 = getelementptr inbounds i16, ptr %4, i64 %3
-  %6 = getelementptr inbounds <4 x i32>, ptr %5, i64 1
-  ret ptr %6
+  %3 = getelementptr inbounds i32, ptr %2, i64 4
+  %4 = getelementptr inbounds i16, ptr %3, i64 %b
+  %5 = getelementptr inbounds i8, ptr %4, i64 1
+  ret ptr %5
 }
 
 ; It is valid to swap if the source operand of the first GEP has multiple uses.
-define ptr @multipleUses1(ptr %p) {
+define ptr @multipleUses1(ptr %p, i64 %a) {
 ; CHECK-LABEL: @multipleUses1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]]
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[A:%.*]]
+; CHECK-NEXT:    call void @use(ptr [[P]])
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
-  %1 = getelementptr inbounds i32, ptr %p, i64 1
-  %2 = ptrtoint ptr %p to i64
-  %3 = getelementptr inbounds i32, ptr %1, i64 %2
-  ret ptr %3
+  %1 = getelementptr inbounds i32, ptr %p, i64 %a
+  %2 = getelementptr inbounds i32, ptr %1, i64 1
+  call void @use(ptr %p)
+  ret ptr %2
 }
 
-; It is valid to swap if the second GEP has multiple uses.
+; Negative test. It is not valid to swap if the first GEP has multiple uses.
 define ptr @multipleUses2(ptr %p, i64 %a) {
 ; CHECK-LABEL: @multipleUses2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    call void @use(ptr nonnull [[TMP2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; CHECK-NEXT:    call void @use(ptr [[TMP1]])
 ; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
-  %1 = getelementptr inbounds i32, ptr %p, i64 1
-  %2 = getelementptr inbounds i32, ptr %1, i64 %a
-  call void @use(ptr %2)
+  %1 = getelementptr inbounds i32, ptr %p, i64 %a
+  %2 = getelementptr inbounds i32, ptr %1, i64 1
+  call void @use(ptr %1)
   ret ptr %2
 }
 
-; Negative test. It is not valid to swap if the first GEP has multiple uses.
-define ptr @multipleUses3(ptr %p) {
-; CHECK-LABEL: @multipleUses3(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]]
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; Test interaction with LICM.
+define i64 @licm(ptr %p) {
+; CHECK-LABEL: @licm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P11:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 4
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i64, ptr [[P11]], i64 [[I]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[P2]], align 4
+; CHECK-NEXT:    [[ADD]] = add nsw i64 [[SUM]], [[LOAD]]
+; CHECK-NEXT:    [[INEXT]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[I]], 1000000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    ret i64 [[ADD_LCSSA]]
 ;
-  %1 = getelementptr inbounds i32, ptr %p, i64 1
-  %2 = ptrtoint ptr %1 to i64
-  %3 = getelementptr inbounds i32, ptr %1, i64 %2
-  ret ptr %3
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
+  %sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  %p1 = getelementptr i64, ptr %p, i64 %i
+  %p2 = getelementptr i64, ptr %p1, i64 4
+  %load = load i64, ptr %p2
+  %add = add nsw i64 %sum, %load
+  %inext = add nuw nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i, 1000000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i64 %add
 }
diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
--- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
+++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
@@ -8,10 +8,10 @@
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_EXT2:%.*]] = zext i32 [[CUR_MATCH:%.*]] to i64
-; CHECK-NEXT:    [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[WIN:%.*]], i64 [[IDX_EXT2]]
 ; CHECK-NEXT:    [[IDX_EXT1:%.*]] = zext i32 [[BEST_LEN:%.*]] to i64
-; CHECK-NEXT:    [[ADD_PTR25:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR4]], i64 [[IDX_EXT1]]
-; CHECK-NEXT:    [[ADD_PTR36:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR25]], i64 -1
+; CHECK-NEXT:    [[ADD_PTR42:%.*]] = getelementptr i8, i8* [[WIN:%.*]], i64 -1
+; CHECK-NEXT:    [[ADD_PTR251:%.*]] = getelementptr i8, i8* [[ADD_PTR42]], i64 [[IDX_EXT2]]
+; CHECK-NEXT:    [[ADD_PTR36:%.*]] = getelementptr i8, i8* [[ADD_PTR251]], i64 [[IDX_EXT1]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[ADD_PTR36]] to i32*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
 ; CHECK-NEXT:    [[CMP7:%.*]] = icmp eq i32 [[TMP1]], [[SCAN_END:%.*]]
@@ -20,9 +20,9 @@
 ; CHECK-NEXT:    br label [[IF_THEN:%.*]]
 ; CHECK:       do.body:
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = zext i32 [[TMP4:%.*]] to i64
-; CHECK-NEXT:    [[ADD_PTR1:%.*]] = getelementptr inbounds i8, i8* [[WIN]], i64 [[IDX_EXT1]]
-; CHECK-NEXT:    [[ADD_PTR22:%.*]] = getelementptr i8, i8* [[ADD_PTR1]], i64 -1
-; CHECK-NEXT:    [[ADD_PTR3:%.*]] = getelementptr i8, i8* [[ADD_PTR22]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR46:%.*]] = getelementptr i8, i8* [[WIN]], i64 -1
+; CHECK-NEXT:    [[ADD_PTR25:%.*]] = getelementptr i8, i8* [[ADD_PTR46]], i64 [[IDX_EXT1]]
+; CHECK-NEXT:    [[ADD_PTR3:%.*]] = getelementptr i8, i8* [[ADD_PTR25]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[ADD_PTR3]] to i32*
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], [[SCAN_END]]
diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
--- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
@@ -186,8 +186,8 @@
 ; address of another member.
 define ptr @partialConstantMemberAliasing2(ptr %p, i64 %a) {
 ; CHECK-LABEL: @partialConstantMemberAliasing2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[A:%.*]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 1
 ; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds %struct.C, ptr %p, i64 %a, i32 1
@@ -199,8 +199,8 @@
 ; range of the object currently pointed by the non-constant GEP.
 define ptr @partialConstantMemberAliasing3(ptr %p, i64 %a) {
 ; CHECK-LABEL: @partialConstantMemberAliasing3(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[A:%.*]], i32 2
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_C:%.*]], ptr [[TMP1]], i64 [[A:%.*]], i32 2
 ; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds %struct.C, ptr %p, i64 %a, i32 2
diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -240,8 +240,8 @@
 
 define ptr @geps_combinable_different_elem_type7(ptr %a, i64 %idx) {
 ; CHECK-LABEL: @geps_combinable_different_elem_type7(
-; CHECK-NEXT:    [[A2:%.*]] = getelementptr { i32, i32 }, ptr [[A:%.*]], i64 [[IDX:%.*]], i32 1
-; CHECK-NEXT:    [[A3:%.*]] = getelementptr i8, ptr [[A2]], i64 4
+; CHECK-NEXT:    [[A21:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4
+; CHECK-NEXT:    [[A3:%.*]] = getelementptr { i32, i32 }, ptr [[A21]], i64 [[IDX:%.*]], i32 1
 ; CHECK-NEXT:    ret ptr [[A3]]
 ;
   %a2 = getelementptr { i32, i32 }, ptr %a, i64 %idx, i32 1
@@ -251,8 +251,8 @@
 
 define ptr @geps_combinable_different_elem_type8(ptr %a, i64 %idx) {
 ; CHECK-LABEL: @geps_combinable_different_elem_type8(
-; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds { { i32, i32 } }, ptr [[A:%.*]], i64 [[IDX:%.*]], i32 0, i32 1
-; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
+; CHECK-NEXT:    [[A21:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4
+; CHECK-NEXT:    [[A3:%.*]] = getelementptr { { i32, i32 } }, ptr [[A21]], i64 [[IDX:%.*]], i32 0, i32 1
 ; CHECK-NEXT:    ret ptr [[A3]]
 ;
   %a2 = getelementptr inbounds { { i32, i32 } }, ptr %a, i64 %idx, i32 0, i32 1
@@ -540,8 +540,8 @@
 ; CHECK-NEXT:    br label [[JOIN]]
 ; CHECK:       join:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ 1, [[IF]] ], [ 2, [[ELSE]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[TMP2]], i64 [[TMP1]]
 ; CHECK-NEXT:    ret ptr [[GEP]]
 ;
   br i1 %c, label %if, label %else
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
@@ -44,8 +44,7 @@
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>*
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8, !alias.scope !0
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -7
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>*
 ; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8, !alias.scope !0
 ; CHECK-NEXT:    [[REVERSE7:%.*]] = shufflevector <4 x double> [[WIDE_LOAD6]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -56,8 +55,7 @@
 ; CHECK-NEXT:    [[REVERSE8:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>*
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE8]], <4 x double> poison), !alias.scope !3, !noalias !0
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, double* [[TMP10]], i64 -7
 ; CHECK-NEXT:    [[REVERSE10:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>*
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE10]], <4 x double> poison), !alias.scope !3, !noalias !0
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -762,11 +762,9 @@
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 ; CHECK-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -2
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>*
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>*
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>